feat: Inital commit
This commit is contained in:
+128
@@ -0,0 +1,128 @@
|
||||
// Copyright 2018 Adam Tauber
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package storage
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"net/http/cookiejar"
|
||||
"net/url"
|
||||
"strings"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// Storage is an interface which handles Collector's internal data,
|
||||
// like visited urls and cookies.
|
||||
// The default Storage of the Collector is the InMemoryStorage.
|
||||
// Collector's storage can be changed by calling Collector.SetStorage()
|
||||
// function.
|
||||
type Storage interface {
|
||||
// Init initializes the storage
|
||||
Init() error
|
||||
// Visited receives and stores a request ID that is visited by the Collector
|
||||
Visited(requestID uint64) error
|
||||
// IsVisited returns true if the request was visited before IsVisited
|
||||
// is called
|
||||
IsVisited(requestID uint64) (bool, error)
|
||||
// Cookies retrieves stored cookies for a given host
|
||||
Cookies(u *url.URL) string
|
||||
// SetCookies stores cookies for a given host
|
||||
SetCookies(u *url.URL, cookies string)
|
||||
}
|
||||
|
||||
// InMemoryStorage is the default storage backend of colly.
|
||||
// InMemoryStorage keeps cookies and visited urls in memory
|
||||
// without persisting data on the disk.
|
||||
type InMemoryStorage struct {
|
||||
visitedURLs map[uint64]bool
|
||||
lock *sync.RWMutex
|
||||
jar *cookiejar.Jar
|
||||
}
|
||||
|
||||
// Init initializes InMemoryStorage
|
||||
func (s *InMemoryStorage) Init() error {
|
||||
if s.visitedURLs == nil {
|
||||
s.visitedURLs = make(map[uint64]bool)
|
||||
}
|
||||
if s.lock == nil {
|
||||
s.lock = &sync.RWMutex{}
|
||||
}
|
||||
if s.jar == nil {
|
||||
var err error
|
||||
s.jar, err = cookiejar.New(nil)
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Visited implements Storage.Visited()
|
||||
func (s *InMemoryStorage) Visited(requestID uint64) error {
|
||||
s.lock.Lock()
|
||||
s.visitedURLs[requestID] = true
|
||||
s.lock.Unlock()
|
||||
return nil
|
||||
}
|
||||
|
||||
// IsVisited implements Storage.IsVisited()
|
||||
func (s *InMemoryStorage) IsVisited(requestID uint64) (bool, error) {
|
||||
s.lock.RLock()
|
||||
visited := s.visitedURLs[requestID]
|
||||
s.lock.RUnlock()
|
||||
return visited, nil
|
||||
}
|
||||
|
||||
// Cookies implements Storage.Cookies()
|
||||
func (s *InMemoryStorage) Cookies(u *url.URL) string {
|
||||
return StringifyCookies(s.jar.Cookies(u))
|
||||
}
|
||||
|
||||
// SetCookies implements Storage.SetCookies()
|
||||
func (s *InMemoryStorage) SetCookies(u *url.URL, cookies string) {
|
||||
s.jar.SetCookies(u, UnstringifyCookies(cookies))
|
||||
}
|
||||
|
||||
// Close implements Storage.Close()
|
||||
func (s *InMemoryStorage) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// StringifyCookies serializes list of http.Cookies to string
|
||||
func StringifyCookies(cookies []*http.Cookie) string {
|
||||
// Stringify cookies.
|
||||
cs := make([]string, len(cookies))
|
||||
for i, c := range cookies {
|
||||
cs[i] = c.String()
|
||||
}
|
||||
return strings.Join(cs, "\n")
|
||||
}
|
||||
|
||||
// UnstringifyCookies deserializes a cookie string to http.Cookies
|
||||
func UnstringifyCookies(s string) []*http.Cookie {
|
||||
h := http.Header{}
|
||||
for _, c := range strings.Split(s, "\n") {
|
||||
h.Add("Set-Cookie", c)
|
||||
}
|
||||
r := http.Response{Header: h}
|
||||
return r.Cookies()
|
||||
}
|
||||
|
||||
// ContainsCookie checks if a cookie name is represented in cookies
|
||||
func ContainsCookie(cookies []*http.Cookie, name string) bool {
|
||||
for _, c := range cookies {
|
||||
if c.Name == name {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
Reference in New Issue
Block a user