feat: Inital commit
This commit is contained in:
+932
@@ -0,0 +1,932 @@
|
||||
/*
|
||||
* Copyright 2020 National Library of Norway.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package url
|
||||
|
||||
import (
|
||||
goerrors "errors"
|
||||
u2 "net/url"
|
||||
"strconv"
|
||||
"strings"
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/bits-and-blooms/bitset"
|
||||
|
||||
"github.com/nlnwa/whatwg-url/errors"
|
||||
)
|
||||
|
||||
func NewParser(opts ...ParserOption) Parser {
|
||||
p := &parser{opts: defaultParserOptions()}
|
||||
for _, opt := range opts {
|
||||
opt.apply(&p.opts)
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
type Parser interface {
|
||||
Parse(rawUrl string) (*Url, error)
|
||||
ParseRef(rawUrl, ref string) (*Url, error)
|
||||
BasicParser(urlOrRef string, base *Url, url *Url, stateOverride State) (*Url, error)
|
||||
PercentEncodeString(s string, tr *PercentEncodeSet) string
|
||||
NewUrl() *Url
|
||||
}
|
||||
|
||||
type parser struct {
|
||||
opts parserOptions
|
||||
}
|
||||
|
||||
func (p *parser) Parse(rawUrl string) (*Url, error) {
|
||||
return p.BasicParser(rawUrl, nil, nil, NoState)
|
||||
}
|
||||
|
||||
func (p *parser) ParseRef(rawUrl, ref string) (*Url, error) {
|
||||
if rawUrl == "" {
|
||||
return p.Parse(ref)
|
||||
}
|
||||
|
||||
b, err := p.Parse(rawUrl)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return p.BasicParser(ref, b, nil, NoState)
|
||||
}
|
||||
|
||||
func (u *Url) Parse(ref string) (*Url, error) {
|
||||
return u.parser.BasicParser(ref, u, nil, NoState)
|
||||
}
|
||||
|
||||
var defaultParser = NewParser()
|
||||
|
||||
func Parse(rawUrl string) (*Url, error) {
|
||||
return defaultParser.Parse(rawUrl)
|
||||
}
|
||||
|
||||
func ParseRef(rawUrl, ref string) (*Url, error) {
|
||||
return defaultParser.ParseRef(rawUrl, ref)
|
||||
}
|
||||
|
||||
type State int
|
||||
|
||||
const (
|
||||
NoState State = iota
|
||||
StateSchemeStart
|
||||
StateScheme
|
||||
StateNoScheme
|
||||
StateOpaquePath
|
||||
StateSpecialRelativeOrAuthority
|
||||
StateSpecialAuthoritySlashes
|
||||
StateSpecialAuthorityIgnoreSlashes
|
||||
StatePathOrAuthority
|
||||
StateAuthority
|
||||
StateHost
|
||||
StateHostname
|
||||
StateFile
|
||||
StateFileHost
|
||||
StateFileSlash
|
||||
StatePort
|
||||
StatePath
|
||||
StatePathStart
|
||||
StateQuery
|
||||
StateFragment
|
||||
StateRelative
|
||||
StateRelativeSlash
|
||||
)
|
||||
|
||||
// BasicParser implements WHATWG basic URL parser (https://url.spec.whatwg.org/#concept-basic-url-parser)
|
||||
// In most cases, when possible, prefer using the higher level Parse method.
|
||||
func (p *parser) BasicParser(urlOrRef string, baseUrl *Url, url *Url, stateOverride State) (*Url, error) {
|
||||
stateOverridden := stateOverride > NoState
|
||||
if url == nil {
|
||||
url = &Url{inputUrl: urlOrRef, path: &path{}}
|
||||
if i, changed := trim(url.inputUrl, C0OrSpacePercentEncodeSet); changed {
|
||||
if err := p.handleError(url, errors.InvalidURLUnit, false); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
url.inputUrl = i
|
||||
}
|
||||
} else {
|
||||
url.inputUrl = urlOrRef
|
||||
}
|
||||
url.parser = p
|
||||
|
||||
if i, changed := remove(url.inputUrl, ASCIITabOrNewline); changed {
|
||||
if err := p.handleError(url, errors.InvalidURLUnit, false); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
url.inputUrl = i
|
||||
}
|
||||
|
||||
input := newInputString(url.inputUrl)
|
||||
var state State
|
||||
if stateOverridden {
|
||||
state = stateOverride
|
||||
} else {
|
||||
state = StateSchemeStart
|
||||
}
|
||||
|
||||
var buffer strings.Builder
|
||||
atFlag := false
|
||||
bracketFlag := false
|
||||
passwordTokenSeenFlag := false
|
||||
|
||||
var base *Url
|
||||
if baseUrl != nil {
|
||||
base = baseUrl.Clone()
|
||||
}
|
||||
|
||||
for {
|
||||
r := input.nextCodePoint()
|
||||
|
||||
switch state {
|
||||
case StateSchemeStart:
|
||||
if ASCIIAlpha.Test(uint(r)) {
|
||||
buffer.WriteRune(unicode.ToLower(r))
|
||||
state = StateScheme
|
||||
} else if !stateOverridden {
|
||||
state = StateNoScheme
|
||||
input.rewindLast()
|
||||
} else {
|
||||
if err := p.handleError(url, errors.InvalidURLUnit, true); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
case StateScheme:
|
||||
tr := ASCIIAlphanumeric.Clone().Set(0x2b).Set(0x2d).Set(0x2e)
|
||||
if tr.Test(uint(r)) {
|
||||
buffer.WriteRune(unicode.ToLower(r))
|
||||
} else if r == ':' {
|
||||
if stateOverridden {
|
||||
// If url’s scheme is a special scheme and buffer is not a special scheme, then return.
|
||||
if url.isSpecialScheme(url.scheme) && !url.isSpecialScheme(buffer.String()) {
|
||||
return url, nil
|
||||
}
|
||||
// If url’s scheme is not a special scheme and buffer is a special scheme, then return.
|
||||
if !url.isSpecialScheme(url.scheme) && url.isSpecialScheme(buffer.String()) {
|
||||
return url, nil
|
||||
}
|
||||
// If url includes credentials or has a non-null port, and buffer is "file", then return.
|
||||
if (url.username != "" || url.password != "" || url.port != nil) && buffer.String() == "file" {
|
||||
return url, nil
|
||||
}
|
||||
// If url’s scheme is "file" and its host is an empty host or null, then return.
|
||||
if url.scheme == "file" && *url.host == "" {
|
||||
return url, nil
|
||||
}
|
||||
}
|
||||
url.scheme = buffer.String()
|
||||
if stateOverridden {
|
||||
url.cleanDefaultPort()
|
||||
return url, nil
|
||||
}
|
||||
buffer.Reset()
|
||||
if url.scheme == "file" {
|
||||
if !input.remainingStartsWith("//") {
|
||||
if err := p.handleError(url, errors.SpecialSchemeMissingFollowingSolidus, false); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
state = StateFile
|
||||
} else if url.IsSpecialScheme() && base != nil && base.scheme == url.scheme {
|
||||
state = StateSpecialRelativeOrAuthority
|
||||
} else if url.IsSpecialScheme() {
|
||||
state = StateSpecialAuthoritySlashes
|
||||
} else if input.remainingStartsWith("/") {
|
||||
state = StatePathOrAuthority
|
||||
input.nextCodePoint()
|
||||
} else {
|
||||
url.path.setOpaque("")
|
||||
state = StateOpaquePath
|
||||
}
|
||||
} else if !stateOverridden {
|
||||
buffer.Reset()
|
||||
state = StateNoScheme
|
||||
input.reset()
|
||||
} else {
|
||||
if err := p.handleError(url, errors.InvalidURLUnit, true); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
case StateNoScheme:
|
||||
if base == nil || (base.path.isOpaque() && r != '#') {
|
||||
if err := p.handleError(url, errors.MissingSchemeNonRelativeURL, true); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
} else if base.path.isOpaque() && r == '#' {
|
||||
url.scheme = base.scheme
|
||||
url.path = base.path
|
||||
url.query = base.query
|
||||
url.fragment = new(string)
|
||||
state = StateFragment
|
||||
} else if base.scheme != "file" {
|
||||
state = StateRelative
|
||||
input.rewindLast()
|
||||
} else {
|
||||
state = StateFile
|
||||
input.rewindLast()
|
||||
}
|
||||
case StateSpecialRelativeOrAuthority:
|
||||
if r == '/' && input.remainingStartsWith("/") {
|
||||
state = StateSpecialAuthorityIgnoreSlashes
|
||||
input.nextCodePoint()
|
||||
} else {
|
||||
if err := p.handleError(url, errors.SpecialSchemeMissingFollowingSolidus, false); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
state = StateRelative
|
||||
input.rewindLast()
|
||||
}
|
||||
case StatePathOrAuthority:
|
||||
if r == '/' {
|
||||
state = StateAuthority
|
||||
} else {
|
||||
state = StatePath
|
||||
input.rewindLast()
|
||||
}
|
||||
case StateRelative:
|
||||
url.scheme = base.scheme
|
||||
if r == '/' {
|
||||
state = StateRelativeSlash
|
||||
} else if url.isSpecialSchemeAndBackslash(r) {
|
||||
if err := p.handleError(url, errors.InvalidReverseSolidus, false); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
state = StateRelativeSlash
|
||||
} else {
|
||||
url.username = base.username
|
||||
url.password = base.password
|
||||
url.host = base.host
|
||||
url.port = base.port
|
||||
url.decodedPort = base.decodedPort
|
||||
url.path = base.path
|
||||
url.query = base.query
|
||||
if r == '?' {
|
||||
url.query = new(string)
|
||||
state = StateQuery
|
||||
} else if r == '#' {
|
||||
url.fragment = new(string)
|
||||
state = StateFragment
|
||||
} else if !input.eof {
|
||||
url.query = nil
|
||||
url.path.shortenPath(url.scheme)
|
||||
state = StatePath
|
||||
input.rewindLast()
|
||||
}
|
||||
}
|
||||
case StateRelativeSlash:
|
||||
if url.IsSpecialScheme() && (r == '/' || r == '\\') {
|
||||
if r == '\\' {
|
||||
if err := p.handleError(url, errors.InvalidReverseSolidus, false); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
state = StateSpecialAuthorityIgnoreSlashes
|
||||
} else if r == '/' {
|
||||
state = StateAuthority
|
||||
} else {
|
||||
url.username = base.username
|
||||
url.password = base.password
|
||||
url.host = base.host
|
||||
url.port = base.port
|
||||
url.decodedPort = base.decodedPort
|
||||
state = StatePath
|
||||
input.rewindLast()
|
||||
}
|
||||
case StateSpecialAuthoritySlashes:
|
||||
if r == '/' && input.remainingStartsWith("/") {
|
||||
state = StateSpecialAuthorityIgnoreSlashes
|
||||
input.nextCodePoint()
|
||||
} else {
|
||||
if err := p.handleError(url, errors.SpecialSchemeMissingFollowingSolidus, false); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
state = StateSpecialAuthorityIgnoreSlashes
|
||||
input.rewindLast()
|
||||
}
|
||||
case StateSpecialAuthorityIgnoreSlashes:
|
||||
if r != '/' && r != '\\' {
|
||||
state = StateAuthority
|
||||
input.rewindLast()
|
||||
} else {
|
||||
if err := p.handleError(url, errors.SpecialSchemeMissingFollowingSolidus, false); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
case StateAuthority:
|
||||
if r == '@' {
|
||||
if err := p.handleError(url, errors.InvalidCredentials, false); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if atFlag {
|
||||
// Prepend %40 to buffer
|
||||
tmp := buffer.String()
|
||||
buffer.Reset()
|
||||
buffer.WriteString("%40")
|
||||
buffer.WriteString(tmp)
|
||||
}
|
||||
atFlag = true
|
||||
bb := newInputString(buffer.String())
|
||||
c := bb.nextCodePoint()
|
||||
for !bb.eof {
|
||||
if c == ':' && !passwordTokenSeenFlag {
|
||||
passwordTokenSeenFlag = true
|
||||
c = bb.nextCodePoint()
|
||||
continue
|
||||
}
|
||||
encodedCodePoints := p.percentEncodeRune(c, UserInfoPercentEncodeSet)
|
||||
if passwordTokenSeenFlag {
|
||||
url.password += encodedCodePoints
|
||||
} else {
|
||||
url.username += encodedCodePoints
|
||||
}
|
||||
c = bb.nextCodePoint()
|
||||
}
|
||||
buffer.Reset()
|
||||
} else if (input.eof || r == '/' || r == '?' || r == '#') || url.isSpecialSchemeAndBackslash(r) {
|
||||
if atFlag && buffer.Len() == 0 {
|
||||
if err := p.handleError(url, errors.InvalidCredentials, true); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
input.rewind(len([]rune(buffer.String())) + 1)
|
||||
buffer.Reset()
|
||||
state = StateHost
|
||||
} else {
|
||||
buffer.WriteRune(r)
|
||||
}
|
||||
case StateHost:
|
||||
fallthrough
|
||||
case StateHostname:
|
||||
if stateOverridden && url.scheme == "file" {
|
||||
input.rewindLast()
|
||||
state = StateFileHost
|
||||
} else if r == ':' && !bracketFlag {
|
||||
if buffer.Len() == 0 {
|
||||
if err := p.handleError(url, errors.HostMissing, true); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
if stateOverride == StateHostname {
|
||||
return url, nil
|
||||
}
|
||||
host, err := p.parseHost(url, p, buffer.String(), !url.IsSpecialScheme())
|
||||
if err != nil {
|
||||
return url, err
|
||||
}
|
||||
url.host = &host
|
||||
buffer.Reset()
|
||||
state = StatePort
|
||||
} else if input.eof || (r == '/' || r == '?' || r == '#' || url.isSpecialSchemeAndBackslash(r)) {
|
||||
input.rewindLast()
|
||||
if url.IsSpecialScheme() && buffer.Len() == 0 {
|
||||
if err := p.handleError(url, errors.HostMissing, true); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
} else if stateOverridden && buffer.Len() == 0 && (url.username != "" || url.password != "" || url.port != nil) {
|
||||
return url, nil
|
||||
} else {
|
||||
host, err := p.parseHost(url, p, buffer.String(), !url.IsSpecialScheme())
|
||||
if err != nil {
|
||||
return url, err
|
||||
}
|
||||
url.host = &host
|
||||
buffer.Reset()
|
||||
state = StatePathStart
|
||||
if stateOverridden {
|
||||
return url, nil
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if r == '[' {
|
||||
bracketFlag = true
|
||||
} else if r == ']' {
|
||||
bracketFlag = false
|
||||
}
|
||||
if input.currentIsInvalid() && p.opts.acceptInvalidCodepoints {
|
||||
buffer.WriteString(string([]byte{input.getCurrentAsByte()}))
|
||||
} else {
|
||||
buffer.WriteRune(r)
|
||||
}
|
||||
}
|
||||
case StatePort:
|
||||
if ASCIIDigit.Test(uint(r)) {
|
||||
buffer.WriteRune(r)
|
||||
} else if (input.eof || r == '/' || r == '?' || r == '#') || url.isSpecialSchemeAndBackslash(r) || stateOverridden {
|
||||
if buffer.Len() > 0 {
|
||||
port, err := strconv.Atoi(buffer.String())
|
||||
if port > 65535 || goerrors.Is(err, strconv.ErrRange) {
|
||||
if err := p.handleWrappedError(url, errors.PortOutOfRange, true, err); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
portString := strconv.Itoa(port)
|
||||
url.decodedPort = port
|
||||
url.port = &portString
|
||||
url.cleanDefaultPort()
|
||||
buffer.Reset()
|
||||
}
|
||||
if stateOverridden {
|
||||
return url, nil
|
||||
}
|
||||
state = StatePathStart
|
||||
input.rewindLast()
|
||||
} else {
|
||||
if err := p.handleError(url, errors.PortInvalid, true); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
case StateFile:
|
||||
url.scheme = "file"
|
||||
url.host = new(string)
|
||||
if r == '/' || r == '\\' {
|
||||
if r == '\\' {
|
||||
if err := p.handleError(url, errors.InvalidReverseSolidus, false); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
state = StateFileSlash
|
||||
} else if base != nil && base.scheme == "file" {
|
||||
url.host = base.host
|
||||
url.path = base.path
|
||||
url.query = base.query
|
||||
if r == '?' {
|
||||
url.query = new(string)
|
||||
state = StateQuery
|
||||
} else if r == '#' {
|
||||
url.fragment = new(string)
|
||||
state = StateFragment
|
||||
} else if !input.eof {
|
||||
url.query = nil
|
||||
if !startsWithAWindowsDriveLetter(input.remainingFromPointer()) {
|
||||
url.path.shortenPath(url.scheme)
|
||||
} else {
|
||||
if err := p.handleError(url, errors.FileInvalidWindowsDriveLetter, false); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
url.path.init()
|
||||
}
|
||||
state = StatePath
|
||||
input.rewindLast()
|
||||
}
|
||||
} else {
|
||||
state = StatePath
|
||||
input.rewindLast()
|
||||
}
|
||||
case StateFileSlash:
|
||||
if r == '/' || r == '\\' {
|
||||
if r == '\\' {
|
||||
if err := p.handleError(url, errors.InvalidReverseSolidus, false); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
state = StateFileHost
|
||||
} else {
|
||||
if base != nil && base.scheme == "file" {
|
||||
url.host = base.host
|
||||
if !startsWithAWindowsDriveLetter(input.remainingFromPointer()) && base.path != nil && isNormalizedWindowsDriveLetter(base.path.p[0]) {
|
||||
// This is a (platform-independent) Windows drive letter quirk. Both url’s and base’s host are null under these conditions and therefore not copied
|
||||
url.path.addSegment(base.path.p[0])
|
||||
}
|
||||
}
|
||||
state = StatePath
|
||||
input.rewindLast()
|
||||
}
|
||||
case StateFileHost:
|
||||
if input.eof || r == '/' || r == '\\' || r == '?' || r == '#' {
|
||||
input.rewindLast()
|
||||
if !stateOverridden && isWindowsDriveLetter(buffer.String()) {
|
||||
if err := p.handleError(url, errors.FileInvalidWindowsDriveLetterHost, false); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
state = StatePath
|
||||
} else if buffer.Len() == 0 {
|
||||
url.host = new(string)
|
||||
if stateOverridden {
|
||||
return nil, nil
|
||||
}
|
||||
state = StatePathStart
|
||||
} else {
|
||||
host, err := p.parseHost(url, p, buffer.String(), !url.IsSpecialScheme())
|
||||
if err != nil {
|
||||
return url, err
|
||||
}
|
||||
if host == "localhost" {
|
||||
host = ""
|
||||
}
|
||||
url.host = &host
|
||||
if stateOverridden {
|
||||
return url, nil
|
||||
}
|
||||
buffer.Reset()
|
||||
state = StatePathStart
|
||||
}
|
||||
} else {
|
||||
buffer.WriteRune(r)
|
||||
}
|
||||
case StatePathStart:
|
||||
if url.IsSpecialScheme() && !p.opts.skipTrailingSlashNormalization {
|
||||
if r == '\\' {
|
||||
if err := p.handleError(url, errors.InvalidReverseSolidus, false); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
state = StatePath
|
||||
if r != '/' && r != '\\' {
|
||||
input.rewindLast()
|
||||
}
|
||||
} else if !stateOverridden && r == '?' {
|
||||
url.query = new(string)
|
||||
state = StateQuery
|
||||
} else if !stateOverridden && r == '#' {
|
||||
url.fragment = new(string)
|
||||
state = StateFragment
|
||||
} else if !input.eof {
|
||||
state = StatePath
|
||||
if r != '/' {
|
||||
input.rewindLast()
|
||||
}
|
||||
} else if stateOverridden && url.host == nil {
|
||||
url.path.addSegment("")
|
||||
}
|
||||
case StatePath:
|
||||
if (input.eof || r == '/') ||
|
||||
url.isSpecialSchemeAndBackslash(r) ||
|
||||
(!stateOverridden && (r == '?' || r == '#')) {
|
||||
|
||||
if url.isSpecialSchemeAndBackslash(r) {
|
||||
if err := p.handleError(url, errors.InvalidReverseSolidus, false); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
if isDoubleDotPathSegment(buffer.String()) {
|
||||
url.path.shortenPath(url.scheme)
|
||||
|
||||
if r != '/' && !url.isSpecialSchemeAndBackslash(r) {
|
||||
url.path.addSegment("")
|
||||
}
|
||||
} else if isSingleDotPathSegment(buffer.String()) && r != '/' && !url.isSpecialSchemeAndBackslash(r) {
|
||||
url.path.addSegment("")
|
||||
} else if !isSingleDotPathSegment(buffer.String()) {
|
||||
if url.scheme == "file" && url.path.isEmpty() && isWindowsDriveLetter(buffer.String()) {
|
||||
// replace second code point in buffer with U+003A (:).
|
||||
// This is a (platform-independent) Windows drive letter quirk.
|
||||
if !p.opts.skipWindowsDriveLetterNormalization {
|
||||
b := buffer.String()
|
||||
buffer.Reset()
|
||||
buffer.WriteString(b[0:1] + ":" + b[2:])
|
||||
}
|
||||
}
|
||||
if !p.opts.collapseConsecutiveSlashes || !url.IsSpecialScheme() || url.path.isEmpty() || len(url.path.p[len(url.path.p)-1]) > 0 {
|
||||
url.path.addSegment(buffer.String())
|
||||
} else {
|
||||
url.path.p[len(url.path.p)-1] = buffer.String()
|
||||
}
|
||||
}
|
||||
buffer.Reset()
|
||||
if r == '?' {
|
||||
url.query = new(string)
|
||||
state = StateQuery
|
||||
} else if r == '#' {
|
||||
url.fragment = new(string)
|
||||
state = StateFragment
|
||||
}
|
||||
} else {
|
||||
if !isURLCodePoint(r) && r != '%' {
|
||||
if err := p.handleError(url, errors.InvalidURLUnit, false); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
invalidPercentEncoding, d := input.remainingIsInvalidPercentEncoded()
|
||||
if invalidPercentEncoding {
|
||||
if err := p.handleErrorWithDescription(url, errors.InvalidURLUnit, false, d); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
if invalidPercentEncoding {
|
||||
buffer.WriteString(p.percentEncodeInvalidRune(r, p.opts.pathPercentEncodeSet))
|
||||
} else {
|
||||
buffer.WriteString(p.percentEncodeRune(r, p.opts.pathPercentEncodeSet))
|
||||
}
|
||||
}
|
||||
case StateOpaquePath:
|
||||
if r == '?' {
|
||||
url.query = new(string)
|
||||
state = StateQuery
|
||||
buffer.Reset()
|
||||
} else if r == '#' {
|
||||
url.fragment = new(string)
|
||||
state = StateFragment
|
||||
buffer.Reset()
|
||||
} else if !input.eof {
|
||||
if !isURLCodePoint(r) && r != '%' {
|
||||
if err := p.handleError(url, errors.InvalidURLUnit, false); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
invalidPercentEncoding, d := input.remainingIsInvalidPercentEncoded()
|
||||
if invalidPercentEncoding {
|
||||
if err := p.handleErrorWithDescription(url, errors.InvalidURLUnit, false, d); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
buffer.WriteString(p.percentEncodeInvalidRune(r, C0PercentEncodeSet))
|
||||
} else {
|
||||
buffer.WriteString(p.percentEncodeRune(r, C0PercentEncodeSet))
|
||||
}
|
||||
url.path.setOpaque(buffer.String())
|
||||
}
|
||||
case StateQuery:
|
||||
if !stateOverridden && r == '#' {
|
||||
url.fragment = new(string)
|
||||
state = StateFragment
|
||||
*url.query = buffer.String()
|
||||
buffer.Reset()
|
||||
} else if !input.eof {
|
||||
if !isURLCodePoint(r) && r != '%' {
|
||||
if err := p.handleError(url, errors.InvalidURLUnit, false); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
invalidPercentEncoding, d := input.remainingIsInvalidPercentEncoded()
|
||||
if invalidPercentEncoding {
|
||||
if err := p.handleErrorWithDescription(url, errors.InvalidURLUnit, false, d); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
encodeSet := p.opts.queryPercentEncodeSet
|
||||
if url.isSpecialScheme(url.scheme) {
|
||||
encodeSet = p.opts.specialQueryPercentEncodeSet
|
||||
}
|
||||
buffer.WriteString(p.percentEncodeRune(r, encodeSet))
|
||||
} else {
|
||||
q := buffer.String()
|
||||
url.query = &q
|
||||
}
|
||||
case StateFragment:
|
||||
if !input.eof {
|
||||
if !isURLCodePoint(r) && r != '%' {
|
||||
if err := p.handleError(url, errors.InvalidURLUnit, false); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
invalidPercentEncoding, d := input.remainingIsInvalidPercentEncoded()
|
||||
if invalidPercentEncoding {
|
||||
if err := p.handleErrorWithDescription(url, errors.InvalidURLUnit, false, d); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
encodeSet := p.opts.fragmentPercentEncodeSet
|
||||
if url.isSpecialScheme(url.scheme) {
|
||||
encodeSet = p.opts.specialFragmentPercentEncodeSet
|
||||
}
|
||||
buffer.WriteString(p.percentEncodeRune(r, encodeSet))
|
||||
} else {
|
||||
f := buffer.String()
|
||||
url.fragment = &f
|
||||
}
|
||||
}
|
||||
|
||||
if input.eof {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return url, nil
|
||||
}
|
||||
|
||||
func (p *parser) percentEncodeInvalidRune(r rune, tr *PercentEncodeSet) string {
|
||||
if p.opts.percentEncodeSinglePercentSign {
|
||||
return p.percentEncodeRune(r, tr.Set(0x25))
|
||||
}
|
||||
return p.percentEncodeRune(r, tr)
|
||||
}
|
||||
|
||||
func (p *parser) percentEncodeRune(r rune, tr *PercentEncodeSet) string {
|
||||
if tr != nil && !tr.RuneShouldBeEncoded(r) {
|
||||
return string(r)
|
||||
}
|
||||
|
||||
var bytes = make([]byte, 4)
|
||||
var n int
|
||||
if p.opts.encodingOverride != nil {
|
||||
b, _ := p.opts.encodingOverride.EncodeRune(r)
|
||||
bytes[0] = b
|
||||
n = 1
|
||||
} else {
|
||||
n = utf8.EncodeRune(bytes, r)
|
||||
}
|
||||
|
||||
percentEncoded := make([]byte, 4*3)
|
||||
j := 0
|
||||
for i := 0; i < n; i++ {
|
||||
c := bytes[i]
|
||||
percentEncoded[j] = '%'
|
||||
percentEncoded[j+1] = "0123456789ABCDEF"[c>>4]
|
||||
percentEncoded[j+2] = "0123456789ABCDEF"[c&15]
|
||||
j += 3
|
||||
}
|
||||
return string(percentEncoded[:j])
|
||||
}
|
||||
|
||||
func (p *parser) PercentEncodeString(s string, tr *PercentEncodeSet) string {
|
||||
buffer := &strings.Builder{}
|
||||
runes := []rune(s)
|
||||
for i, r := range runes {
|
||||
if r == '%' {
|
||||
if len(runes) < (i+3) ||
|
||||
(!ASCIIHexDigit.Test(uint(runes[i+1])) || !ASCIIHexDigit.Test(uint(runes[i+2]))) {
|
||||
if p.opts.percentEncodeSinglePercentSign {
|
||||
buffer.WriteString(p.percentEncodeRune(r, tr.Set(0x25)))
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
buffer.WriteString(p.percentEncodeRune(r, tr))
|
||||
}
|
||||
return buffer.String()
|
||||
}
|
||||
|
||||
func (p *parser) DecodePercentEncoded(s string) string {
|
||||
sb := strings.Builder{}
|
||||
bytes := []byte(s)
|
||||
for i := 0; i < len(bytes); i++ {
|
||||
if bytes[i] != '%' {
|
||||
sb.WriteByte(bytes[i])
|
||||
} else if len(bytes) < (i+3) ||
|
||||
(!ASCIIHexDigit.Test(uint(bytes[i+1])) || !ASCIIHexDigit.Test(uint(bytes[i+2]))) {
|
||||
sb.WriteByte(bytes[i])
|
||||
} else {
|
||||
b, e := u2.PathUnescape(string(bytes[i : i+3]))
|
||||
if e != nil {
|
||||
return sb.String()
|
||||
}
|
||||
if p.opts.encodingOverride != nil {
|
||||
r := p.opts.encodingOverride.DecodeByte(b[0])
|
||||
sb.WriteRune(r)
|
||||
} else {
|
||||
sb.WriteString(b)
|
||||
}
|
||||
i += 2
|
||||
}
|
||||
}
|
||||
return sb.String()
|
||||
}
|
||||
|
||||
func (p *parser) NewUrl() *Url {
|
||||
u := Url{}
|
||||
u.parser = p
|
||||
|
||||
u.path = &path{}
|
||||
u.path.init()
|
||||
|
||||
return &u
|
||||
}
|
||||
|
||||
func isSingleDotPathSegment(s string) bool {
|
||||
if s == "." {
|
||||
return true
|
||||
}
|
||||
s = strings.ToLower(s)
|
||||
return s == "%2e"
|
||||
}
|
||||
|
||||
func isDoubleDotPathSegment(s string) bool {
|
||||
if s == ".." {
|
||||
return true
|
||||
}
|
||||
s = strings.ToLower(s)
|
||||
if s == ".%2e" || s == "%2e." || s == "%2e%2e" {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func startsWithAWindowsDriveLetter(s string) bool {
|
||||
if len(s) >= 2 && isWindowsDriveLetter(s[0:2]) &&
|
||||
(len(s) == 2 || s[2] == '/' || s[2] == '\\' || s[2] == '?' || s[2] == '#') {
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func isWindowsDriveLetter(s string) bool {
|
||||
if len(s) == 2 && ASCIIAlpha.Test(uint(s[0])) &&
|
||||
(s[1] == ':' || s[1] == '|') {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func isNormalizedWindowsDriveLetter(s string) bool {
|
||||
if len(s) == 2 && ASCIIAlpha.Test(uint(s[0])) &&
|
||||
(s[1] == ':') {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func trimPrefix(s string, tr *PercentEncodeSet) (string, bool) {
|
||||
if s == "" {
|
||||
return s, false
|
||||
}
|
||||
for i, c := range s {
|
||||
if tr.RuneNotInSet(c) {
|
||||
return s[i:], i > 0
|
||||
}
|
||||
}
|
||||
return "", true
|
||||
}
|
||||
|
||||
func trimPostfix(s string, tr *PercentEncodeSet) (string, bool) {
|
||||
if s == "" {
|
||||
return s, false
|
||||
}
|
||||
for i := len(s) - 1; i >= 0; i-- {
|
||||
c := s[i]
|
||||
if tr.RuneNotInSet(int32(c)) {
|
||||
return s[:i+1], i < (len(s) - 1)
|
||||
}
|
||||
}
|
||||
return "", true
|
||||
}
|
||||
|
||||
func trim(s string, tr *PercentEncodeSet) (string, bool) {
|
||||
var c1, c2 bool
|
||||
s, c1 = trimPrefix(s, tr)
|
||||
s, c2 = trimPostfix(s, tr)
|
||||
return s, c1 || c2
|
||||
}
|
||||
|
||||
func remove(s string, tr *bitset.BitSet) (string, bool) {
|
||||
if s == "" {
|
||||
return s, false
|
||||
}
|
||||
changed := false
|
||||
var r []byte
|
||||
for _, c := range []byte(s) {
|
||||
if tr.Test(uint(c)) {
|
||||
changed = true
|
||||
} else {
|
||||
r = append(r, c)
|
||||
}
|
||||
}
|
||||
return string(r), changed
|
||||
}
|
||||
|
||||
func containsOnly(s string, tr *bitset.BitSet) bool {
|
||||
for _, c := range []byte(s) {
|
||||
if !tr.Test(uint(c)) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (u *Url) IsSpecialScheme() bool {
|
||||
return u.isSpecialScheme(u.scheme)
|
||||
}
|
||||
|
||||
func (u *Url) isSpecialScheme(s string) bool {
|
||||
_, ok := u.getSpecialScheme(s)
|
||||
return ok
|
||||
}
|
||||
|
||||
func (u *Url) getSpecialScheme(s string) (string, bool) {
|
||||
dp, ok := u.parser.opts.specialSchemes[s]
|
||||
return dp, ok
|
||||
}
|
||||
|
||||
func (u *Url) isSpecialSchemeAndBackslash(r rune) bool {
|
||||
ok := u.IsSpecialScheme()
|
||||
return ok && r == '\\'
|
||||
}
|
||||
|
||||
func (u *Url) cleanDefaultPort() {
|
||||
if dp, ok := u.getSpecialScheme(u.scheme); ok && (u.port == nil || dp == *u.port) {
|
||||
u.port = nil
|
||||
u.decodedPort = 0
|
||||
}
|
||||
}
|
||||
|
||||
func (u *Url) getDefaultPort() int {
|
||||
if dp, ok := u.getSpecialScheme(u.scheme); ok {
|
||||
if p, err := strconv.Atoi(dp); err == nil {
|
||||
return p
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
Reference in New Issue
Block a user