feat: Inital commit

This commit is contained in:
2025-07-26 05:58:59 +00:00
commit 753d1c60ea
1849 changed files with 830533 additions and 0 deletions
+201
View File
@@ -0,0 +1,201 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
+60
View File
@@ -0,0 +1,60 @@
/*
* Copyright 2020 National Library of Norway.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package errors
type ErrorType string
// IDNA errors
const (
DomainToASCII ErrorType = "Unicode ToASCII records an error or returns the empty string"
DomainToUnicode ErrorType = "Unicode ToUnicode records an error"
)
// Host parsing errors
const (
DomainInvalidCodePoint ErrorType = "The host contains a forbidden domain code point"
HostInvalidCodePoint ErrorType = "An opaque host (in a URL that is not special) contains a forbidden host code point"
IPv4EmptyPart ErrorType = "An IPv4 address ends with a U+002E (.)"
IPv4TooManyParts ErrorType = "An IPv4 address has more than four parts"
IPv4NonNumericPart ErrorType = "An IPv4 address contains a non-numeric part"
IPv4NonDecimalPart ErrorType = "The IPv4 address contains numbers expressed using hexadecimal or octal digits"
IPv4OutOfRangePart ErrorType = "An IPv4 address contains a part that is greater than 255"
IPv6Unclosed ErrorType = "An IPv6 address is missing the closing U+005D (])"
IPv6InvalidCompression ErrorType = "An IPv6 address begins with improper compression"
IPv6TooManyPieces ErrorType = "An IPv6 address has more than eight pieces"
IPv6MultipleCompression ErrorType = "An IPv6 address contains multiple instances of '::'"
IPv6InvalidCodePoint ErrorType = "An IPv6 address contains a code point that is neither an ASCII hex digit nor a U+003A (:). Or it unexpectedly ends"
IPv6TooFewPieces ErrorType = "An uncompressed IPv6 address contains fewer than 8 pieces"
IPv4InIPv6TooManyPieces ErrorType = "An IPv4 address is found in an IPv6 address, but the IPv6 address has more than 6 pieces"
IPv4InIPv6InvalidCodePoint ErrorType = "An IPv4 address is found in an IPv6 address and one of the following is true: 1. An IPv4 part is empty or contains a non-ASCII digit. 2. An IPv4 part contains a leading 0. 3. There are too many IPv4 parts"
IPv4InIPv6OutOfRangePart ErrorType = "An IPv4 address is found in an IPv6 address and one of the IPv4 parts is greater than 255"
IPv4InIPv6TooFewParts ErrorType = "An IPv4 address is found in an IPv6 address and there are too few IPv4 parts"
)
// URL parsing errors
const (
InvalidURLUnit ErrorType = "A code point is found that is not a URL unit"
SpecialSchemeMissingFollowingSolidus ErrorType = "The inputs scheme is not followed by '//'"
MissingSchemeNonRelativeURL ErrorType = "The input is missing a scheme, because it does not begin with an ASCII alpha, and either no base URL was provided or the base URL cannot be used as a base URL because it has an opaque path"
InvalidReverseSolidus ErrorType = "The URL has a special scheme and it uses U+005C (\\) instead of U+002F (/)"
InvalidCredentials ErrorType = "The input includes credentials"
HostMissing ErrorType = "The input has a special scheme, but does not contain a host"
PortOutOfRange ErrorType = "The input's port is outside the range [0-65535]"
PortInvalid ErrorType = "The input's port is not a number"
FileInvalidWindowsDriveLetter ErrorType = "The input is a relative-URL string that starts with a Windows drive letter and the base URLs scheme is 'file'"
FileInvalidWindowsDriveLetterHost ErrorType = "A file: URLs host is a Windows drive letter"
)
+163
View File
@@ -0,0 +1,163 @@
/*
* Copyright 2020 National Library of Norway.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package errors
import (
"fmt"
)
// ValidationError indicates that the url is not valid
type ValidationError struct {
errorType ErrorType
cause error // the root cause for this error
descr string // description of the error
failure bool // true if the error is a failure, false if it is a warning
url string
}
func (e *ValidationError) Error() string {
errMsg := fmt.Sprintf("Error: %s", e.errorType)
if e.descr != "" {
errMsg += fmt.Sprintf(": '%s'", e.descr)
}
if e.url != "" {
errMsg += fmt.Sprintf(". Url: '%s'", e.url)
}
if nil == e.cause {
return errMsg
}
return errMsg + ", Cause: " + e.cause.Error()
}
// Unwrap returns the root cause for this error
func (e *ValidationError) Unwrap() error {
return e.cause
}
// Type returns the error type
func (e *ValidationError) Type() ErrorType {
return e.errorType
}
// Url returns the url causing the error
func (e *ValidationError) Url() string {
return e.url
}
// Failure returns true if the error is a failure, false if it is a warning
func (e *ValidationError) Failure() bool {
return e.failure
}
// Description returns the error description
func (e *ValidationError) Description() string {
return e.descr
}
// Type returns the error type
func Type(err error) ErrorType {
type typer interface {
Type() ErrorType
}
cd, ok := err.(typer)
if !ok {
return ""
}
return cd.Type()
}
// Description returns the error description
func Description(err error) string {
type descr interface {
Description() string
}
m, ok := err.(descr)
if !ok {
return ""
}
return m.Description()
}
// Url returns the url causing the error
func Url(err error) string {
type url interface {
Url() string
}
m, ok := err.(url)
if !ok {
return ""
}
return m.Url()
}
// Failure returns true if the error is a failure, false if it is a warning.
// If the error does not implement the Failure() method, true is returned
func Failure(err error) bool {
type failure interface {
Failure() bool
}
m, ok := err.(failure)
if !ok {
return true
}
return m.Failure()
}
// Error constructs a new error
func Error(errorType ErrorType, url string, failure bool) error {
return &ValidationError{
errorType: errorType,
url: url,
failure: failure,
}
}
// ErrorWithDescr constructs a new error
func ErrorWithDescr(errorType ErrorType, descr string, url string, failure bool) error {
return &ValidationError{
errorType: errorType,
descr: descr,
url: url,
failure: failure,
}
}
// Wrap wraps an error with an error code and url
func Wrap(err error, errorType ErrorType, url string, failure bool) error {
return &ValidationError{
errorType: errorType,
url: url,
cause: err,
failure: failure,
}
}
// WrapWithDescr wraps an error with an error code, url and a description
func WrapWithDescr(err error, errorType ErrorType, descr string, url string, failure bool) error {
return &ValidationError{
errorType: errorType,
descr: descr,
url: url,
cause: err,
failure: failure,
}
}
+153
View File
@@ -0,0 +1,153 @@
/*
* Copyright 2020 National Library of Norway.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package url
import (
"unicode"
"github.com/bits-and-blooms/bitset"
)
type PercentEncodeSet struct {
bs *bitset.BitSet
allBelow int32
}
func NewPercentEncodeSet(allBelow int32, bytes ...uint) *PercentEncodeSet {
p := &PercentEncodeSet{allBelow: allBelow, bs: bitset.New(0x7f)}
for _, b := range bytes {
p.bs.Set(b)
}
return p
}
func (p *PercentEncodeSet) Set(bytes ...uint) *PercentEncodeSet {
r := &PercentEncodeSet{
allBelow: p.allBelow,
bs: p.bs.Clone(),
}
for _, b := range bytes {
r.bs.Set(b)
}
return r
}
func (p *PercentEncodeSet) Clear(bytes ...uint) *PercentEncodeSet {
r := &PercentEncodeSet{
allBelow: p.allBelow,
bs: p.bs.Clone(),
}
for _, b := range bytes {
r.bs.Clear(b)
}
return r
}
func (p *PercentEncodeSet) RuneShouldBeEncoded(r rune) bool {
if r < p.allBelow || r > 0x007E || p.bs.Test(uint(r)) {
return true
}
return false
}
func (p *PercentEncodeSet) ByteShouldBeEncoded(b byte) bool {
if int32(b) < p.allBelow || b > 0x007E || p.bs.Test(uint(b)) {
return true
}
return false
}
func (p *PercentEncodeSet) RuneNotInSet(r rune) bool {
if r < p.allBelow || p.bs.Test(uint(r)) {
return false
}
return true
}
func isURLCodePoint(r rune) bool {
if ASCIIAlphanumeric.Test(uint(r)) {
return true
}
if someURLCodePoints.Test(uint(r)) {
return true
}
if r >= 0xa0 && r <= 0x10fffd {
if unicode.Is(unicode.Noncharacter_Code_Point, r) {
return false
}
if unicode.Is(unicode.Cs, r) {
return false
}
return true
}
return false
}
var ASCIITabOrNewline = bitset.New(0x0d).Set(0x09).Set(0x0a).Set(0x0d)
var ASCIIAlpha = bitset.New(0x7a)
var ASCIIDigit = bitset.New(0x39)
var ASCIIHexDigit = bitset.New(0x66)
var ASCIIAlphanumeric = bitset.New(0x7a)
var C0control = bitset.New(0x1f)
var C0controlOrSpace = bitset.New(0x20).Set(0x20)
var ForbiddenHostCodePoint = bitset.New(0x7c).Set(0x00).Set(0x09).Set(0x0a).Set(0x0d).Set(0x20).
Set(0x23).Set(0x2f).Set(0x3a).Set(0x3c).Set(0x3e).Set(0x3f).Set(0x40).Set(0x5b).
Set(0x5c).Set(0x5d).Set(0x5e).Set(0x7c)
var ForbiddenDomainCodePoint = ForbiddenHostCodePoint.Clone().Set(0x25).Set(0x7f)
var someURLCodePoints = bitset.New(0x7e).Set(0x24).Set(0x26).Set(0x27).Set(0x28).Set(0x29).
Set(0x2a).Set(0x2b).Set(0x2c).Set(0x2d).Set(0x2e).Set(0x2f).Set(0x3a).Set(0x3b).Set(0x3d).
Set(0x3f).Set(0x40).Set(0x5f).Set(0x7e)
var C0PercentEncodeSet = NewPercentEncodeSet(0x20)
var C0OrSpacePercentEncodeSet = NewPercentEncodeSet(0x21)
var FragmentPercentEncodeSet = C0OrSpacePercentEncodeSet.Set(0x22, 0x3c, 0x3e, 0x60)
var QueryPercentEncodeSet = C0OrSpacePercentEncodeSet.Set(0x22, 0x23, 0x3C, 0x3E)
var SpecialQueryPercentEncodeSet = QueryPercentEncodeSet.Set(0x27)
var PathPercentEncodeSet = QueryPercentEncodeSet.Set(0x3f, 0x60, 0x7b, 0x7d)
var UserInfoPercentEncodeSet = PathPercentEncodeSet.Set(0x2f, 0x3a, 0x3b, 0x3d, 0x40, 0x5b, 0x5c, 0x5d, 0x5e, 0x7c)
var HostPercentEncodeSet = C0OrSpacePercentEncodeSet.Set(0x23)
func init() {
for i := 'a'; i <= 'z'; i++ {
ASCIIAlpha.Set(uint(i))
}
for i := 'A'; i <= 'Z'; i++ {
ASCIIAlpha.Set(uint(i))
}
for i := '0'; i <= '9'; i++ {
ASCIIDigit.Set(uint(i))
}
ASCIIAlphanumeric.InPlaceUnion(ASCIIAlpha)
ASCIIAlphanumeric.InPlaceUnion(ASCIIDigit)
ASCIIHexDigit.InPlaceUnion(ASCIIDigit)
for i := 'A'; i <= 'F'; i++ {
ASCIIHexDigit.Set(uint(i))
}
for i := 'a'; i <= 'f'; i++ {
ASCIIHexDigit.Set(uint(i))
}
// Add C0 control characters
for i := 0x00; i <= 0x1f; i++ {
C0control.Set(uint(i))
C0controlOrSpace.Set(uint(i))
ForbiddenDomainCodePoint.Set(uint(i))
}
}
+57
View File
@@ -0,0 +1,57 @@
/*
* Copyright 2020 National Library of Norway.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package url
import (
"github.com/nlnwa/whatwg-url/errors"
)
// handleError handles an error according to the options set for the parser
func (p *parser) handleError(u *Url, errorType errors.ErrorType, failure bool) error {
e := errors.Error(errorType, u.inputUrl, failure)
if p.opts.reportValidationErrors {
u.validationErrors = append(u.validationErrors, e)
}
if failure || p.opts.failOnValidationError {
return e
}
return nil
}
// handleErrorWithDescription handles an error according to the options set for the parser
func (p *parser) handleErrorWithDescription(u *Url, errorType errors.ErrorType, failure bool, descr string) error {
e := errors.ErrorWithDescr(errorType, descr, u.inputUrl, failure)
if p.opts.reportValidationErrors {
u.validationErrors = append(u.validationErrors, e)
}
if failure || p.opts.failOnValidationError {
return e
}
return nil
}
// handleWrappedError handles an error according to the options set for the parser
func (p *parser) handleWrappedError(u *Url, errorType errors.ErrorType, failure bool, cause error) error {
e := errors.Wrap(cause, errorType, u.inputUrl, failure)
if p.opts.reportValidationErrors {
u.validationErrors = append(u.validationErrors, e)
}
if failure || p.opts.failOnValidationError {
return e
}
return nil
}
+529
View File
@@ -0,0 +1,529 @@
/*
* Copyright 2019 National Library of Norway.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package url
import (
goerrors "errors"
"fmt"
"math"
"strconv"
"strings"
"unicode/utf8"
"golang.org/x/net/idna"
"github.com/nlnwa/whatwg-url/errors"
)
// parseHost parses the host part of the input string.
func (p *parser) parseHost(u *Url, parser *parser, input string, isNotSpecial bool) (string, error) {
if p.opts.preParseHostFunc != nil {
input = p.opts.preParseHostFunc(u, input)
}
if input == "" {
return "", nil
}
if input[0] == '[' {
if !strings.HasSuffix(input, "]") {
if err := p.handleError(u, errors.IPv6Unclosed, true); err != nil {
return "", err
}
}
input = strings.Trim(input, "[]")
return p.parseIPv6(u, newInputString(input))
}
if isNotSpecial {
return p.parseOpaqueHost(u, input)
}
domain := p.DecodePercentEncoded(input)
if !utf8.ValidString(domain) {
if p.opts.laxHostParsing {
return percentEncodeString(input, HostPercentEncodeSet), nil
}
if err := p.handleErrorWithDescription(u, errors.DomainToASCII, true, "not a valid UTF-8 string"); err != nil {
return "", err
}
}
asciiDomain, err := p.ToASCII(domain, false)
if err != nil {
if p.opts.laxHostParsing {
return domain, nil
}
if err := p.handleWrappedError(u, errors.DomainToASCII, true, err); err != nil {
return "", err
}
}
for _, c := range asciiDomain {
if ForbiddenDomainCodePoint.Test(uint(c)) {
if p.opts.laxHostParsing {
return parser.PercentEncodeString(asciiDomain, HostPercentEncodeSet), nil
} else {
if err := p.handleErrorWithDescription(u, errors.DomainInvalidCodePoint, true, string(c)); err != nil {
return "", err
}
}
}
}
if p.endsInANumber(u, asciiDomain) {
ipv4Host, err := p.parseIPv4(u, asciiDomain)
return ipv4Host, err
}
if p.opts.postParseHostFunc != nil {
asciiDomain = p.opts.postParseHostFunc(u, asciiDomain)
}
return asciiDomain, nil
}
func (p *parser) endsInANumber(u *Url, input string) bool {
parts := strings.Split(input, ".")
if parts[len(parts)-1] == "" {
if len(parts) == 1 {
return false
}
parts = parts[0 : len(parts)-1]
}
last := parts[len(parts)-1]
if last != "" && containsOnly(last, ASCIIDigit) {
return true
}
if _, _, err := p.parseIPv4Number(u, last); err == nil || goerrors.Is(err, strconv.ErrRange) {
return true
}
return false
}
func (p *parser) parseIPv4Number(u *Url, input string) (number int64, validationError bool, err error) {
if input == "" {
if err = p.handleError(u, errors.IPv4EmptyPart, true); err != nil {
return
}
}
R := 10
if len(input) >= 2 && (strings.HasPrefix(input, "0x") || strings.HasPrefix(input, "0X")) {
validationError = true
input = input[2:]
R = 16
} else if len(input) >= 2 && strings.HasPrefix(input, "0") {
validationError = true
input = input[1:]
R = 8
}
if input == "" {
validationError = true
return
}
number, err = strconv.ParseInt(input, R, 64)
return
}
func (p *parser) parseIPv4(u *Url, input string) (string, error) {
parts := strings.Split(input, ".")
if parts[len(parts)-1] == "" {
if err := p.handleError(u, errors.IPv4EmptyPart, false); err != nil {
return input, err
}
if len(parts) > 1 {
parts = parts[:len(parts)-1]
}
}
if len(parts) > 4 {
if err := p.handleError(u, errors.IPv4TooManyParts, true); err != nil {
return input, err
}
}
var numbers []int64
for _, part := range parts {
n, validationError, err := p.parseIPv4Number(u, part)
if err != nil {
if err := p.handleWrappedError(u, errors.IPv4NonNumericPart, true, err); err != nil {
return input, err
}
}
if validationError {
if err := p.handleError(u, errors.IPv4NonDecimalPart, false); err != nil {
return input, err
}
}
numbers = append(numbers, n)
}
for _, n := range numbers {
if n > 255 {
if err := p.handleError(u, errors.IPv4OutOfRangePart, false); err != nil {
return input, err
}
}
}
for _, n := range numbers[:len(numbers)-1] {
if n > 255 {
if err := p.handleError(u, errors.IPv4OutOfRangePart, true); err != nil {
return "", err
}
}
}
if numbers[len(numbers)-1] >= int64(math.Pow(256, float64(5-len(numbers)))) {
if err := p.handleError(u, errors.IPv4OutOfRangePart, true); err != nil {
return "", err
}
}
var ipv4 = IPv4Addr(numbers[len(numbers)-1])
numbers = numbers[:len(numbers)-1]
for counter, n := range numbers {
ipv4 += IPv4Addr(n * int64(math.Pow(256, float64(3-counter))))
}
u.isIPv4 = true
return ipv4.String(), nil
}
func (p *parser) parseIPv6(u *Url, input *inputString) (string, error) {
address := &IPv6Addr{}
pieceIdx := 0
compress := -1
c := input.nextCodePoint()
if c == ':' {
if !input.remainingStartsWith(":") {
if err := p.handleError(u, errors.IPv6InvalidCompression, true); err != nil {
return "", err
}
}
input.nextCodePoint()
c = input.nextCodePoint()
pieceIdx++
compress = pieceIdx
}
for !input.eof {
if pieceIdx == 8 {
if err := p.handleError(u, errors.IPv6TooManyPieces, true); err != nil {
return "", err
}
}
if c == ':' {
if compress >= 0 {
if err := p.handleError(u, errors.IPv6MultipleCompression, true); err != nil {
return "", err
}
}
c = input.nextCodePoint()
pieceIdx++
compress = pieceIdx
continue
}
value := 0
length := 0
for length < 4 && ASCIIHexDigit.Test(uint(c)) {
v, _ := strconv.ParseInt(string(c), 16, 32)
value = value*0x10 + int(v)
c = input.nextCodePoint()
length++
}
if c == '.' {
if length == 0 {
if err := p.handleError(u, errors.IPv4InIPv6InvalidCodePoint, true); err != nil {
return "", err
}
}
input.rewind(length + 1)
c = input.nextCodePoint()
if pieceIdx > 6 {
if err := p.handleError(u, errors.IPv4InIPv6TooManyPieces, true); err != nil {
return "", err
}
}
numbersSeen := 0
for !input.eof {
ipv4Piece := -1
if numbersSeen > 0 {
if c == '.' && numbersSeen < 4 {
c = input.nextCodePoint()
} else {
if err := p.handleError(u, errors.IPv4InIPv6InvalidCodePoint, true); err != nil {
return "", err
}
}
}
if !ASCIIDigit.Test(uint(c)) {
if err := p.handleError(u, errors.IPv4InIPv6InvalidCodePoint, true); err != nil {
return "", err
}
}
for ASCIIDigit.Test(uint(c)) {
number, _ := strconv.Atoi(string(c))
if ipv4Piece < 0 {
ipv4Piece = number
} else if ipv4Piece == 0 {
if err := p.handleError(u, errors.IPv4InIPv6InvalidCodePoint, true); err != nil {
return "", err
}
} else {
ipv4Piece = ipv4Piece*10 + number
}
if ipv4Piece > 255 {
if err := p.handleError(u, errors.IPv4InIPv6OutOfRangePart, true); err != nil {
return "", err
}
}
c = input.nextCodePoint()
}
address[pieceIdx] = address[pieceIdx]*0x100 + uint16(ipv4Piece)
numbersSeen++
if numbersSeen == 2 || numbersSeen == 4 {
pieceIdx++
}
}
if numbersSeen != 4 {
if err := p.handleError(u, errors.IPv4InIPv6TooFewParts, true); err != nil {
return "", err
}
}
break
} else if c == ':' {
c = input.nextCodePoint()
if input.eof {
if err := p.handleError(u, errors.IPv6InvalidCodePoint, true); err != nil {
return "", err
}
}
} else if !input.eof {
if err := p.handleError(u, errors.IPv6InvalidCodePoint, true); err != nil {
return "", err
}
}
address[pieceIdx] = uint16(value)
pieceIdx++
}
if compress >= 0 {
swaps := pieceIdx - compress
pieceIdx = 7
for pieceIdx != 0 && swaps > 0 {
t := address[pieceIdx]
address[pieceIdx] = address[compress+swaps-1]
address[compress+swaps-1] = t
pieceIdx--
swaps--
}
} else if compress < 0 && pieceIdx != 8 {
if err := p.handleError(u, errors.IPv6TooFewPieces, true); err != nil {
return "", err
}
}
u.isIPv6 = true
return "[" + address.String() + "]", nil
}
func (p *parser) parseOpaqueHost(u *Url, input string) (string, error) {
output := ""
for i, c := range input {
if ForbiddenHostCodePoint.Test(uint(c)) {
if p.opts.laxHostParsing {
return input, nil
} else {
if err := p.handleErrorWithDescription(u, errors.HostInvalidCodePoint, true, string(c)); err != nil {
return "", err
}
}
}
if !isURLCodePoint(c) && c != '%' {
if err := p.handleErrorWithDescription(u, errors.InvalidURLUnit, false, string(c)); err != nil {
return "", err
}
}
if c == '%' {
invalidPercentEncoding, d := remainingIsInvalidPercentEncoded([]rune(input[i:]))
if invalidPercentEncoding {
if err := p.handleErrorWithDescription(u, errors.InvalidURLUnit, false, d); err != nil {
return "", err
}
}
}
output += p.percentEncodeRune(c, C0PercentEncodeSet)
}
return output, nil
}
type IPv6Addr [8]uint16
func (address *IPv6Addr) String() string {
output := ""
compress := -1
currentIdx := -1
currentLength := 0
compressLength := 0
for pieceIdx := 0; pieceIdx < 8; pieceIdx++ {
if address[pieceIdx] == 0 {
if currentIdx < 0 {
currentIdx = pieceIdx
}
currentLength++
} else {
if currentLength > 1 && currentLength > compressLength {
compress = currentIdx
compressLength = currentLength
}
currentIdx = -1
currentLength = 0
}
}
if currentLength > 1 && currentLength > compressLength {
compress = currentIdx
}
ignore0 := false
for pieceIdx := 0; pieceIdx < 8; pieceIdx++ {
if ignore0 && address[pieceIdx] == 0 {
continue
} else if ignore0 {
ignore0 = false
}
if compress == pieceIdx {
separator := ":"
if pieceIdx == 0 {
separator = "::"
}
output += separator
ignore0 = true
continue
}
output += strconv.FormatUint(uint64(address[pieceIdx]), 16)
// 32512
if pieceIdx != 7 {
output += ":"
}
}
return output
}
type IPv4Addr uint32
func (address IPv4Addr) String() string {
return strconv.Itoa(int(address>>24)) + "." +
strconv.Itoa(int((address>>16)&0xFF)) + "." +
strconv.Itoa(int((address>>8)&0xFF)) + "." +
strconv.Itoa(int(address&0xFF))
}
var idnaProfile = idna.New(
idna.MapForLookup(),
idna.BidiRule(),
idna.VerifyDNSLength(false),
idna.StrictDomainName(true),
idna.ValidateLabels(true),
idna.CheckHyphens(false),
idna.CheckJoiners(true),
idna.Transitional(false),
)
// ToASCII converts a string to ASCII using IDNA
// https://url.spec.whatwg.org/#concept-domain-to-ascii
func (p *parser) ToASCII(src string, beStrict bool) (string, error) {
if src == "" {
return "", nil
}
// If encoding is set, convert to Unicode
if p.opts.encodingOverride != nil {
if u, err := p.stringToUnicode(src); err == nil {
src = u
}
}
// Convert to punycode
a, err := idnaProfile.ToASCII(src)
if err != nil {
if !beStrict {
if containsOnlyASCIIOrMiscAndNoPunycode(src) {
return a, nil
}
}
if !p.opts.laxHostParsing {
return a, err
}
}
if a == "" {
return "", fmt.Errorf("idna toAscii returned empty string")
}
return a, nil
}
// containsOnlyASCIIOrMiscAndNoPunycode returns true if the string contains only ASCII characters or characters from Section 4.1.1 in UTS #46
// and does not contain any labels starting with acePrefix (xn--)
func containsOnlyASCIIOrMiscAndNoPunycode(s string) bool {
s = strings.ToLower(s)
p := 0
for _, r := range s {
if r >= utf8.RuneSelf && r != '\u2260' && r != '\u226e' && r != '\u226f' {
return false
}
switch {
case r == '.':
p = 0
case p == 0 && r == 'x':
p = 1
case p == 1 && r == 'n':
p = 2
case p == 2 && r == '-':
p = 3
case p == 3 && r == '-':
return false
default:
p = -1
}
}
return true
}
func (p *parser) stringToUnicode(src string) (string, error) {
var bb []byte
for _, r := range src {
if b, ok := p.opts.encodingOverride.EncodeRune(r); ok && b > 31 {
bb = append(bb, b)
} else {
return "", fmt.Errorf("could not conver %v to Unicode using %v", src, p.opts.encodingOverride.String())
}
}
return string(bb), nil
}
func percentEncodeString(s string, tr *PercentEncodeSet) string {
sb := strings.Builder{}
for _, b := range []byte(s) {
sb.WriteString(percentEncodeByte(b, tr))
}
return sb.String()
}
func percentEncodeByte(b byte, tr *PercentEncodeSet) string {
if tr != nil && !tr.ByteShouldBeEncoded(b) {
return string(b)
}
percentEncoded := make([]byte, 3)
percentEncoded[0] = '%'
percentEncoded[1] = "0123456789ABCDEF"[b>>4]
percentEncoded[2] = "0123456789ABCDEF"[b&15]
return string(percentEncoded)
}
+118
View File
@@ -0,0 +1,118 @@
/*
* Copyright 2020 National Library of Norway.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package url
import (
"strings"
"unicode/utf8"
)
type inputString struct {
s string
runes []rune
pointer int
eof bool
length int
}
func newInputString(s string) *inputString {
i := &inputString{runes: []rune(s), pointer: -1}
i.s = s
i.length = len(i.runes)
return i
}
func (i *inputString) nextCodePoint() rune {
i.pointer++
if i.pointer >= i.length {
i.eof = true
return utf8.RuneError
}
r := i.runes[i.pointer]
return r
}
func (i *inputString) currentIsInvalid() bool {
return i.runes[i.pointer] == utf8.RuneError
}
func (i *inputString) getCurrentAsByte() byte {
if i.pointer >= i.length {
i.eof = true
return 0
}
var pos int
for j := 0; j < i.pointer; j++ {
pos += utf8.RuneLen(i.runes[j])
}
return i.s[pos]
}
func (i *inputString) rewindLast() {
i.eof = false
i.pointer--
}
func (i *inputString) reset() {
i.pointer = -1
i.eof = false
}
func (i *inputString) rewind(length int) {
i.pointer -= length
i.eof = false
}
func (i *inputString) remainingFromPointer() string {
if i.eof {
return ""
}
return string(i.runes[i.pointer:])
}
func (i *inputString) remainingStartsWith(s string) bool {
if i.eof {
return false
}
return strings.HasPrefix(string(i.runes[i.pointer+1:]), s)
}
// remainingIsInvalidPercentEncoded returns true if the first three characters in the rune array are not '%' followed by two hex digits.
// If true, the second return value is the invalid percent encoded string.
func (i *inputString) remainingIsInvalidPercentEncoded() (bool, string) {
return remainingIsInvalidPercentEncoded(i.runes[i.pointer:])
}
// remainingIsInvalidPercentEncoded returns true if the first three characters in the rune array are not '%' followed by two hex digits.
// If true, the second return value is the invalid percent encoded string.
func remainingIsInvalidPercentEncoded(runes []rune) (bool, string) {
if len(runes) >= 1 && runes[0] == '%' &&
(len(runes) < 3 || (!ASCIIHexDigit.Test(uint(runes[1])) || !ASCIIHexDigit.Test(uint(runes[2])))) {
l := 3
if len(runes) < 3 {
l = len(runes)
}
return true, string(runes[0:l])
}
return false, ""
}
func (i *inputString) String() string {
return string(i.runes)
}
+932
View File
@@ -0,0 +1,932 @@
/*
* Copyright 2020 National Library of Norway.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package url
import (
goerrors "errors"
u2 "net/url"
"strconv"
"strings"
"unicode"
"unicode/utf8"
"github.com/bits-and-blooms/bitset"
"github.com/nlnwa/whatwg-url/errors"
)
func NewParser(opts ...ParserOption) Parser {
p := &parser{opts: defaultParserOptions()}
for _, opt := range opts {
opt.apply(&p.opts)
}
return p
}
type Parser interface {
Parse(rawUrl string) (*Url, error)
ParseRef(rawUrl, ref string) (*Url, error)
BasicParser(urlOrRef string, base *Url, url *Url, stateOverride State) (*Url, error)
PercentEncodeString(s string, tr *PercentEncodeSet) string
NewUrl() *Url
}
type parser struct {
opts parserOptions
}
func (p *parser) Parse(rawUrl string) (*Url, error) {
return p.BasicParser(rawUrl, nil, nil, NoState)
}
func (p *parser) ParseRef(rawUrl, ref string) (*Url, error) {
if rawUrl == "" {
return p.Parse(ref)
}
b, err := p.Parse(rawUrl)
if err != nil {
return nil, err
}
return p.BasicParser(ref, b, nil, NoState)
}
func (u *Url) Parse(ref string) (*Url, error) {
return u.parser.BasicParser(ref, u, nil, NoState)
}
var defaultParser = NewParser()
func Parse(rawUrl string) (*Url, error) {
return defaultParser.Parse(rawUrl)
}
func ParseRef(rawUrl, ref string) (*Url, error) {
return defaultParser.ParseRef(rawUrl, ref)
}
type State int
const (
NoState State = iota
StateSchemeStart
StateScheme
StateNoScheme
StateOpaquePath
StateSpecialRelativeOrAuthority
StateSpecialAuthoritySlashes
StateSpecialAuthorityIgnoreSlashes
StatePathOrAuthority
StateAuthority
StateHost
StateHostname
StateFile
StateFileHost
StateFileSlash
StatePort
StatePath
StatePathStart
StateQuery
StateFragment
StateRelative
StateRelativeSlash
)
// BasicParser implements WHATWG basic URL parser (https://url.spec.whatwg.org/#concept-basic-url-parser)
// In most cases, when possible, prefer using the higher level Parse method.
func (p *parser) BasicParser(urlOrRef string, baseUrl *Url, url *Url, stateOverride State) (*Url, error) {
stateOverridden := stateOverride > NoState
if url == nil {
url = &Url{inputUrl: urlOrRef, path: &path{}}
if i, changed := trim(url.inputUrl, C0OrSpacePercentEncodeSet); changed {
if err := p.handleError(url, errors.InvalidURLUnit, false); err != nil {
return nil, err
}
url.inputUrl = i
}
} else {
url.inputUrl = urlOrRef
}
url.parser = p
if i, changed := remove(url.inputUrl, ASCIITabOrNewline); changed {
if err := p.handleError(url, errors.InvalidURLUnit, false); err != nil {
return nil, err
}
url.inputUrl = i
}
input := newInputString(url.inputUrl)
var state State
if stateOverridden {
state = stateOverride
} else {
state = StateSchemeStart
}
var buffer strings.Builder
atFlag := false
bracketFlag := false
passwordTokenSeenFlag := false
var base *Url
if baseUrl != nil {
base = baseUrl.Clone()
}
for {
r := input.nextCodePoint()
switch state {
case StateSchemeStart:
if ASCIIAlpha.Test(uint(r)) {
buffer.WriteRune(unicode.ToLower(r))
state = StateScheme
} else if !stateOverridden {
state = StateNoScheme
input.rewindLast()
} else {
if err := p.handleError(url, errors.InvalidURLUnit, true); err != nil {
return nil, err
}
}
case StateScheme:
tr := ASCIIAlphanumeric.Clone().Set(0x2b).Set(0x2d).Set(0x2e)
if tr.Test(uint(r)) {
buffer.WriteRune(unicode.ToLower(r))
} else if r == ':' {
if stateOverridden {
// If urls scheme is a special scheme and buffer is not a special scheme, then return.
if url.isSpecialScheme(url.scheme) && !url.isSpecialScheme(buffer.String()) {
return url, nil
}
// If urls scheme is not a special scheme and buffer is a special scheme, then return.
if !url.isSpecialScheme(url.scheme) && url.isSpecialScheme(buffer.String()) {
return url, nil
}
// If url includes credentials or has a non-null port, and buffer is "file", then return.
if (url.username != "" || url.password != "" || url.port != nil) && buffer.String() == "file" {
return url, nil
}
// If urls scheme is "file" and its host is an empty host or null, then return.
if url.scheme == "file" && *url.host == "" {
return url, nil
}
}
url.scheme = buffer.String()
if stateOverridden {
url.cleanDefaultPort()
return url, nil
}
buffer.Reset()
if url.scheme == "file" {
if !input.remainingStartsWith("//") {
if err := p.handleError(url, errors.SpecialSchemeMissingFollowingSolidus, false); err != nil {
return nil, err
}
}
state = StateFile
} else if url.IsSpecialScheme() && base != nil && base.scheme == url.scheme {
state = StateSpecialRelativeOrAuthority
} else if url.IsSpecialScheme() {
state = StateSpecialAuthoritySlashes
} else if input.remainingStartsWith("/") {
state = StatePathOrAuthority
input.nextCodePoint()
} else {
url.path.setOpaque("")
state = StateOpaquePath
}
} else if !stateOverridden {
buffer.Reset()
state = StateNoScheme
input.reset()
} else {
if err := p.handleError(url, errors.InvalidURLUnit, true); err != nil {
return nil, err
}
}
case StateNoScheme:
if base == nil || (base.path.isOpaque() && r != '#') {
if err := p.handleError(url, errors.MissingSchemeNonRelativeURL, true); err != nil {
return nil, err
}
} else if base.path.isOpaque() && r == '#' {
url.scheme = base.scheme
url.path = base.path
url.query = base.query
url.fragment = new(string)
state = StateFragment
} else if base.scheme != "file" {
state = StateRelative
input.rewindLast()
} else {
state = StateFile
input.rewindLast()
}
case StateSpecialRelativeOrAuthority:
if r == '/' && input.remainingStartsWith("/") {
state = StateSpecialAuthorityIgnoreSlashes
input.nextCodePoint()
} else {
if err := p.handleError(url, errors.SpecialSchemeMissingFollowingSolidus, false); err != nil {
return nil, err
}
state = StateRelative
input.rewindLast()
}
case StatePathOrAuthority:
if r == '/' {
state = StateAuthority
} else {
state = StatePath
input.rewindLast()
}
case StateRelative:
url.scheme = base.scheme
if r == '/' {
state = StateRelativeSlash
} else if url.isSpecialSchemeAndBackslash(r) {
if err := p.handleError(url, errors.InvalidReverseSolidus, false); err != nil {
return nil, err
}
state = StateRelativeSlash
} else {
url.username = base.username
url.password = base.password
url.host = base.host
url.port = base.port
url.decodedPort = base.decodedPort
url.path = base.path
url.query = base.query
if r == '?' {
url.query = new(string)
state = StateQuery
} else if r == '#' {
url.fragment = new(string)
state = StateFragment
} else if !input.eof {
url.query = nil
url.path.shortenPath(url.scheme)
state = StatePath
input.rewindLast()
}
}
case StateRelativeSlash:
if url.IsSpecialScheme() && (r == '/' || r == '\\') {
if r == '\\' {
if err := p.handleError(url, errors.InvalidReverseSolidus, false); err != nil {
return nil, err
}
}
state = StateSpecialAuthorityIgnoreSlashes
} else if r == '/' {
state = StateAuthority
} else {
url.username = base.username
url.password = base.password
url.host = base.host
url.port = base.port
url.decodedPort = base.decodedPort
state = StatePath
input.rewindLast()
}
case StateSpecialAuthoritySlashes:
if r == '/' && input.remainingStartsWith("/") {
state = StateSpecialAuthorityIgnoreSlashes
input.nextCodePoint()
} else {
if err := p.handleError(url, errors.SpecialSchemeMissingFollowingSolidus, false); err != nil {
return nil, err
}
state = StateSpecialAuthorityIgnoreSlashes
input.rewindLast()
}
case StateSpecialAuthorityIgnoreSlashes:
if r != '/' && r != '\\' {
state = StateAuthority
input.rewindLast()
} else {
if err := p.handleError(url, errors.SpecialSchemeMissingFollowingSolidus, false); err != nil {
return nil, err
}
}
case StateAuthority:
if r == '@' {
if err := p.handleError(url, errors.InvalidCredentials, false); err != nil {
return nil, err
}
if atFlag {
// Prepend %40 to buffer
tmp := buffer.String()
buffer.Reset()
buffer.WriteString("%40")
buffer.WriteString(tmp)
}
atFlag = true
bb := newInputString(buffer.String())
c := bb.nextCodePoint()
for !bb.eof {
if c == ':' && !passwordTokenSeenFlag {
passwordTokenSeenFlag = true
c = bb.nextCodePoint()
continue
}
encodedCodePoints := p.percentEncodeRune(c, UserInfoPercentEncodeSet)
if passwordTokenSeenFlag {
url.password += encodedCodePoints
} else {
url.username += encodedCodePoints
}
c = bb.nextCodePoint()
}
buffer.Reset()
} else if (input.eof || r == '/' || r == '?' || r == '#') || url.isSpecialSchemeAndBackslash(r) {
if atFlag && buffer.Len() == 0 {
if err := p.handleError(url, errors.InvalidCredentials, true); err != nil {
return nil, err
}
}
input.rewind(len([]rune(buffer.String())) + 1)
buffer.Reset()
state = StateHost
} else {
buffer.WriteRune(r)
}
case StateHost:
fallthrough
case StateHostname:
if stateOverridden && url.scheme == "file" {
input.rewindLast()
state = StateFileHost
} else if r == ':' && !bracketFlag {
if buffer.Len() == 0 {
if err := p.handleError(url, errors.HostMissing, true); err != nil {
return nil, err
}
}
if stateOverride == StateHostname {
return url, nil
}
host, err := p.parseHost(url, p, buffer.String(), !url.IsSpecialScheme())
if err != nil {
return url, err
}
url.host = &host
buffer.Reset()
state = StatePort
} else if input.eof || (r == '/' || r == '?' || r == '#' || url.isSpecialSchemeAndBackslash(r)) {
input.rewindLast()
if url.IsSpecialScheme() && buffer.Len() == 0 {
if err := p.handleError(url, errors.HostMissing, true); err != nil {
return nil, err
}
} else if stateOverridden && buffer.Len() == 0 && (url.username != "" || url.password != "" || url.port != nil) {
return url, nil
} else {
host, err := p.parseHost(url, p, buffer.String(), !url.IsSpecialScheme())
if err != nil {
return url, err
}
url.host = &host
buffer.Reset()
state = StatePathStart
if stateOverridden {
return url, nil
}
}
} else {
if r == '[' {
bracketFlag = true
} else if r == ']' {
bracketFlag = false
}
if input.currentIsInvalid() && p.opts.acceptInvalidCodepoints {
buffer.WriteString(string([]byte{input.getCurrentAsByte()}))
} else {
buffer.WriteRune(r)
}
}
case StatePort:
if ASCIIDigit.Test(uint(r)) {
buffer.WriteRune(r)
} else if (input.eof || r == '/' || r == '?' || r == '#') || url.isSpecialSchemeAndBackslash(r) || stateOverridden {
if buffer.Len() > 0 {
port, err := strconv.Atoi(buffer.String())
if port > 65535 || goerrors.Is(err, strconv.ErrRange) {
if err := p.handleWrappedError(url, errors.PortOutOfRange, true, err); err != nil {
return nil, err
}
}
portString := strconv.Itoa(port)
url.decodedPort = port
url.port = &portString
url.cleanDefaultPort()
buffer.Reset()
}
if stateOverridden {
return url, nil
}
state = StatePathStart
input.rewindLast()
} else {
if err := p.handleError(url, errors.PortInvalid, true); err != nil {
return nil, err
}
}
case StateFile:
url.scheme = "file"
url.host = new(string)
if r == '/' || r == '\\' {
if r == '\\' {
if err := p.handleError(url, errors.InvalidReverseSolidus, false); err != nil {
return nil, err
}
}
state = StateFileSlash
} else if base != nil && base.scheme == "file" {
url.host = base.host
url.path = base.path
url.query = base.query
if r == '?' {
url.query = new(string)
state = StateQuery
} else if r == '#' {
url.fragment = new(string)
state = StateFragment
} else if !input.eof {
url.query = nil
if !startsWithAWindowsDriveLetter(input.remainingFromPointer()) {
url.path.shortenPath(url.scheme)
} else {
if err := p.handleError(url, errors.FileInvalidWindowsDriveLetter, false); err != nil {
return nil, err
}
url.path.init()
}
state = StatePath
input.rewindLast()
}
} else {
state = StatePath
input.rewindLast()
}
case StateFileSlash:
if r == '/' || r == '\\' {
if r == '\\' {
if err := p.handleError(url, errors.InvalidReverseSolidus, false); err != nil {
return nil, err
}
}
state = StateFileHost
} else {
if base != nil && base.scheme == "file" {
url.host = base.host
if !startsWithAWindowsDriveLetter(input.remainingFromPointer()) && base.path != nil && isNormalizedWindowsDriveLetter(base.path.p[0]) {
// This is a (platform-independent) Windows drive letter quirk. Both urls and bases host are null under these conditions and therefore not copied
url.path.addSegment(base.path.p[0])
}
}
state = StatePath
input.rewindLast()
}
case StateFileHost:
if input.eof || r == '/' || r == '\\' || r == '?' || r == '#' {
input.rewindLast()
if !stateOverridden && isWindowsDriveLetter(buffer.String()) {
if err := p.handleError(url, errors.FileInvalidWindowsDriveLetterHost, false); err != nil {
return nil, err
}
state = StatePath
} else if buffer.Len() == 0 {
url.host = new(string)
if stateOverridden {
return nil, nil
}
state = StatePathStart
} else {
host, err := p.parseHost(url, p, buffer.String(), !url.IsSpecialScheme())
if err != nil {
return url, err
}
if host == "localhost" {
host = ""
}
url.host = &host
if stateOverridden {
return url, nil
}
buffer.Reset()
state = StatePathStart
}
} else {
buffer.WriteRune(r)
}
case StatePathStart:
if url.IsSpecialScheme() && !p.opts.skipTrailingSlashNormalization {
if r == '\\' {
if err := p.handleError(url, errors.InvalidReverseSolidus, false); err != nil {
return nil, err
}
}
state = StatePath
if r != '/' && r != '\\' {
input.rewindLast()
}
} else if !stateOverridden && r == '?' {
url.query = new(string)
state = StateQuery
} else if !stateOverridden && r == '#' {
url.fragment = new(string)
state = StateFragment
} else if !input.eof {
state = StatePath
if r != '/' {
input.rewindLast()
}
} else if stateOverridden && url.host == nil {
url.path.addSegment("")
}
case StatePath:
if (input.eof || r == '/') ||
url.isSpecialSchemeAndBackslash(r) ||
(!stateOverridden && (r == '?' || r == '#')) {
if url.isSpecialSchemeAndBackslash(r) {
if err := p.handleError(url, errors.InvalidReverseSolidus, false); err != nil {
return nil, err
}
}
if isDoubleDotPathSegment(buffer.String()) {
url.path.shortenPath(url.scheme)
if r != '/' && !url.isSpecialSchemeAndBackslash(r) {
url.path.addSegment("")
}
} else if isSingleDotPathSegment(buffer.String()) && r != '/' && !url.isSpecialSchemeAndBackslash(r) {
url.path.addSegment("")
} else if !isSingleDotPathSegment(buffer.String()) {
if url.scheme == "file" && url.path.isEmpty() && isWindowsDriveLetter(buffer.String()) {
// replace second code point in buffer with U+003A (:).
// This is a (platform-independent) Windows drive letter quirk.
if !p.opts.skipWindowsDriveLetterNormalization {
b := buffer.String()
buffer.Reset()
buffer.WriteString(b[0:1] + ":" + b[2:])
}
}
if !p.opts.collapseConsecutiveSlashes || !url.IsSpecialScheme() || url.path.isEmpty() || len(url.path.p[len(url.path.p)-1]) > 0 {
url.path.addSegment(buffer.String())
} else {
url.path.p[len(url.path.p)-1] = buffer.String()
}
}
buffer.Reset()
if r == '?' {
url.query = new(string)
state = StateQuery
} else if r == '#' {
url.fragment = new(string)
state = StateFragment
}
} else {
if !isURLCodePoint(r) && r != '%' {
if err := p.handleError(url, errors.InvalidURLUnit, false); err != nil {
return nil, err
}
}
invalidPercentEncoding, d := input.remainingIsInvalidPercentEncoded()
if invalidPercentEncoding {
if err := p.handleErrorWithDescription(url, errors.InvalidURLUnit, false, d); err != nil {
return nil, err
}
}
if invalidPercentEncoding {
buffer.WriteString(p.percentEncodeInvalidRune(r, p.opts.pathPercentEncodeSet))
} else {
buffer.WriteString(p.percentEncodeRune(r, p.opts.pathPercentEncodeSet))
}
}
case StateOpaquePath:
if r == '?' {
url.query = new(string)
state = StateQuery
buffer.Reset()
} else if r == '#' {
url.fragment = new(string)
state = StateFragment
buffer.Reset()
} else if !input.eof {
if !isURLCodePoint(r) && r != '%' {
if err := p.handleError(url, errors.InvalidURLUnit, false); err != nil {
return nil, err
}
}
invalidPercentEncoding, d := input.remainingIsInvalidPercentEncoded()
if invalidPercentEncoding {
if err := p.handleErrorWithDescription(url, errors.InvalidURLUnit, false, d); err != nil {
return nil, err
}
buffer.WriteString(p.percentEncodeInvalidRune(r, C0PercentEncodeSet))
} else {
buffer.WriteString(p.percentEncodeRune(r, C0PercentEncodeSet))
}
url.path.setOpaque(buffer.String())
}
case StateQuery:
if !stateOverridden && r == '#' {
url.fragment = new(string)
state = StateFragment
*url.query = buffer.String()
buffer.Reset()
} else if !input.eof {
if !isURLCodePoint(r) && r != '%' {
if err := p.handleError(url, errors.InvalidURLUnit, false); err != nil {
return nil, err
}
}
invalidPercentEncoding, d := input.remainingIsInvalidPercentEncoded()
if invalidPercentEncoding {
if err := p.handleErrorWithDescription(url, errors.InvalidURLUnit, false, d); err != nil {
return nil, err
}
}
encodeSet := p.opts.queryPercentEncodeSet
if url.isSpecialScheme(url.scheme) {
encodeSet = p.opts.specialQueryPercentEncodeSet
}
buffer.WriteString(p.percentEncodeRune(r, encodeSet))
} else {
q := buffer.String()
url.query = &q
}
case StateFragment:
if !input.eof {
if !isURLCodePoint(r) && r != '%' {
if err := p.handleError(url, errors.InvalidURLUnit, false); err != nil {
return nil, err
}
}
invalidPercentEncoding, d := input.remainingIsInvalidPercentEncoded()
if invalidPercentEncoding {
if err := p.handleErrorWithDescription(url, errors.InvalidURLUnit, false, d); err != nil {
return nil, err
}
}
encodeSet := p.opts.fragmentPercentEncodeSet
if url.isSpecialScheme(url.scheme) {
encodeSet = p.opts.specialFragmentPercentEncodeSet
}
buffer.WriteString(p.percentEncodeRune(r, encodeSet))
} else {
f := buffer.String()
url.fragment = &f
}
}
if input.eof {
break
}
}
return url, nil
}
func (p *parser) percentEncodeInvalidRune(r rune, tr *PercentEncodeSet) string {
if p.opts.percentEncodeSinglePercentSign {
return p.percentEncodeRune(r, tr.Set(0x25))
}
return p.percentEncodeRune(r, tr)
}
func (p *parser) percentEncodeRune(r rune, tr *PercentEncodeSet) string {
if tr != nil && !tr.RuneShouldBeEncoded(r) {
return string(r)
}
var bytes = make([]byte, 4)
var n int
if p.opts.encodingOverride != nil {
b, _ := p.opts.encodingOverride.EncodeRune(r)
bytes[0] = b
n = 1
} else {
n = utf8.EncodeRune(bytes, r)
}
percentEncoded := make([]byte, 4*3)
j := 0
for i := 0; i < n; i++ {
c := bytes[i]
percentEncoded[j] = '%'
percentEncoded[j+1] = "0123456789ABCDEF"[c>>4]
percentEncoded[j+2] = "0123456789ABCDEF"[c&15]
j += 3
}
return string(percentEncoded[:j])
}
func (p *parser) PercentEncodeString(s string, tr *PercentEncodeSet) string {
buffer := &strings.Builder{}
runes := []rune(s)
for i, r := range runes {
if r == '%' {
if len(runes) < (i+3) ||
(!ASCIIHexDigit.Test(uint(runes[i+1])) || !ASCIIHexDigit.Test(uint(runes[i+2]))) {
if p.opts.percentEncodeSinglePercentSign {
buffer.WriteString(p.percentEncodeRune(r, tr.Set(0x25)))
continue
}
}
}
buffer.WriteString(p.percentEncodeRune(r, tr))
}
return buffer.String()
}
func (p *parser) DecodePercentEncoded(s string) string {
sb := strings.Builder{}
bytes := []byte(s)
for i := 0; i < len(bytes); i++ {
if bytes[i] != '%' {
sb.WriteByte(bytes[i])
} else if len(bytes) < (i+3) ||
(!ASCIIHexDigit.Test(uint(bytes[i+1])) || !ASCIIHexDigit.Test(uint(bytes[i+2]))) {
sb.WriteByte(bytes[i])
} else {
b, e := u2.PathUnescape(string(bytes[i : i+3]))
if e != nil {
return sb.String()
}
if p.opts.encodingOverride != nil {
r := p.opts.encodingOverride.DecodeByte(b[0])
sb.WriteRune(r)
} else {
sb.WriteString(b)
}
i += 2
}
}
return sb.String()
}
func (p *parser) NewUrl() *Url {
u := Url{}
u.parser = p
u.path = &path{}
u.path.init()
return &u
}
func isSingleDotPathSegment(s string) bool {
if s == "." {
return true
}
s = strings.ToLower(s)
return s == "%2e"
}
func isDoubleDotPathSegment(s string) bool {
if s == ".." {
return true
}
s = strings.ToLower(s)
if s == ".%2e" || s == "%2e." || s == "%2e%2e" {
return true
}
return false
}
func startsWithAWindowsDriveLetter(s string) bool {
if len(s) >= 2 && isWindowsDriveLetter(s[0:2]) &&
(len(s) == 2 || s[2] == '/' || s[2] == '\\' || s[2] == '?' || s[2] == '#') {
return true
}
return false
}
func isWindowsDriveLetter(s string) bool {
if len(s) == 2 && ASCIIAlpha.Test(uint(s[0])) &&
(s[1] == ':' || s[1] == '|') {
return true
}
return false
}
func isNormalizedWindowsDriveLetter(s string) bool {
if len(s) == 2 && ASCIIAlpha.Test(uint(s[0])) &&
(s[1] == ':') {
return true
}
return false
}
func trimPrefix(s string, tr *PercentEncodeSet) (string, bool) {
if s == "" {
return s, false
}
for i, c := range s {
if tr.RuneNotInSet(c) {
return s[i:], i > 0
}
}
return "", true
}
func trimPostfix(s string, tr *PercentEncodeSet) (string, bool) {
if s == "" {
return s, false
}
for i := len(s) - 1; i >= 0; i-- {
c := s[i]
if tr.RuneNotInSet(int32(c)) {
return s[:i+1], i < (len(s) - 1)
}
}
return "", true
}
func trim(s string, tr *PercentEncodeSet) (string, bool) {
var c1, c2 bool
s, c1 = trimPrefix(s, tr)
s, c2 = trimPostfix(s, tr)
return s, c1 || c2
}
func remove(s string, tr *bitset.BitSet) (string, bool) {
if s == "" {
return s, false
}
changed := false
var r []byte
for _, c := range []byte(s) {
if tr.Test(uint(c)) {
changed = true
} else {
r = append(r, c)
}
}
return string(r), changed
}
func containsOnly(s string, tr *bitset.BitSet) bool {
for _, c := range []byte(s) {
if !tr.Test(uint(c)) {
return false
}
}
return true
}
func (u *Url) IsSpecialScheme() bool {
return u.isSpecialScheme(u.scheme)
}
func (u *Url) isSpecialScheme(s string) bool {
_, ok := u.getSpecialScheme(s)
return ok
}
func (u *Url) getSpecialScheme(s string) (string, bool) {
dp, ok := u.parser.opts.specialSchemes[s]
return dp, ok
}
func (u *Url) isSpecialSchemeAndBackslash(r rune) bool {
ok := u.IsSpecialScheme()
return ok && r == '\\'
}
func (u *Url) cleanDefaultPort() {
if dp, ok := u.getSpecialScheme(u.scheme); ok && (u.port == nil || dp == *u.port) {
u.port = nil
u.decodedPort = 0
}
}
func (u *Url) getDefaultPort() int {
if dp, ok := u.getSpecialScheme(u.scheme); ok {
if p, err := strconv.Atoi(dp); err == nil {
return p
}
}
return 0
}
+279
View File
@@ -0,0 +1,279 @@
/*
* Copyright 2020 National Library of Norway.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package url
import "golang.org/x/text/encoding/charmap"
var defaultSpecialSchemes = map[string]string{
"ftp": "21",
"file": "",
"http": "80",
"https": "443",
"ws": "80",
"wss": "443",
}
// parserOptions configure a url parser. parserOptions are set by the ParserOption
// values passed to NewParser.
type parserOptions struct {
reportValidationErrors bool
failOnValidationError bool
laxHostParsing bool
collapseConsecutiveSlashes bool
acceptInvalidCodepoints bool
preParseHostFunc func(url *Url, host string) string
postParseHostFunc func(url *Url, host string) string
percentEncodeSinglePercentSign bool
allowSettingPathForNonBaseUrl bool
skipWindowsDriveLetterNormalization bool
specialSchemes map[string]string
skipTrailingSlashNormalization bool
encodingOverride *charmap.Charmap
pathPercentEncodeSet *PercentEncodeSet
specialQueryPercentEncodeSet *PercentEncodeSet
queryPercentEncodeSet *PercentEncodeSet
specialFragmentPercentEncodeSet *PercentEncodeSet
fragmentPercentEncodeSet *PercentEncodeSet
skipEqualsForEmptySearchParamsValue bool
}
// ParserOption configures how we parse a URL.
type ParserOption interface {
apply(*parserOptions)
}
// EmptyParserOption does not alter the parser configuration. It can be embedded in
// another structure to build custom parser options.
type EmptyParserOption struct{}
func (EmptyParserOption) apply(*parserOptions) {}
// funcParserOption wraps a function that modifies parserOptions into an
// implementation of the ParserOption interface.
type funcParserOption struct {
f func(*parserOptions)
}
func (fpo *funcParserOption) apply(po *parserOptions) {
fpo.f(po)
}
func newFuncParserOption(f func(*parserOptions)) *funcParserOption {
return &funcParserOption{
f: f,
}
}
func defaultParserOptions() parserOptions {
return parserOptions{
pathPercentEncodeSet: PathPercentEncodeSet,
specialQueryPercentEncodeSet: SpecialQueryPercentEncodeSet,
queryPercentEncodeSet: QueryPercentEncodeSet,
specialFragmentPercentEncodeSet: FragmentPercentEncodeSet,
fragmentPercentEncodeSet: FragmentPercentEncodeSet,
specialSchemes: defaultSpecialSchemes,
}
}
// WithReportValidationErrors records all non fatal validation errors so that they can be fetchd by a call to....
func WithReportValidationErrors() ParserOption {
return newFuncParserOption(func(o *parserOptions) {
o.reportValidationErrors = true
})
}
// WithFailOnValidationError makes the parser throw an error on non fatal validation errors.
func WithFailOnValidationError() ParserOption {
return newFuncParserOption(func(o *parserOptions) {
o.failOnValidationError = true
})
}
// WithLaxHostParsing ignores some decoding errors and returns the host as is.
//
// This API is EXPERIMENTAL.
func WithLaxHostParsing() ParserOption {
return newFuncParserOption(func(o *parserOptions) {
o.laxHostParsing = true
})
}
// WithCollapseConsecutiveSlashes collapses consecutive slashes in path into one
// (e.g. http://example.com//foo///bar => http://example.com/foo/bar).
func WithCollapseConsecutiveSlashes() ParserOption {
return newFuncParserOption(func(o *parserOptions) {
o.collapseConsecutiveSlashes = true
})
}
// WithAcceptInvalidCodepoints percent encodes values which are not valid UTF-8.
//
// This API is EXPERIMENTAL.
func WithAcceptInvalidCodepoints() ParserOption {
return newFuncParserOption(func(o *parserOptions) {
o.acceptInvalidCodepoints = true
})
}
// WithPreParseHostFunc is a function which allows manipulation of host string before it is parsed.
//
// This API is EXPERIMENTAL.
func WithPreParseHostFunc(f func(url *Url, host string) string) ParserOption {
return newFuncParserOption(func(o *parserOptions) {
o.preParseHostFunc = f
})
}
// WithPostParseHostFunc is a function which allows manipulation of host string after it is parsed.
// It is called only if the host isn't an IP address.
//
// This API is EXPERIMENTAL.
func WithPostParseHostFunc(f func(url *Url, host string) string) ParserOption {
return newFuncParserOption(func(o *parserOptions) {
o.postParseHostFunc = f
})
}
// WithPercentEncodeSinglePercentSign percent encodes a '%' which is not followed by two hexadecimal digits
// instead of complaining about invalid percent encoding.
//
// This API is EXPERIMENTAL.
func WithPercentEncodeSinglePercentSign() ParserOption {
return newFuncParserOption(func(o *parserOptions) {
o.percentEncodeSinglePercentSign = true
})
}
// WithAllowSettingPathForNonBaseUrl allows to set path for a url which cannot be a base url.
// WhathWg standard says this should be illegal
//
// This API is EXPERIMENTAL.
func WithAllowSettingPathForNonBaseUrl() ParserOption {
return newFuncParserOption(func(o *parserOptions) {
o.allowSettingPathForNonBaseUrl = true
})
}
// WithSkipWindowsDriveLetterNormalization skips conversion of 'C|' to 'C:'.
// WhathWg standard says only a normalized Windows drive letter is conforming.
//
// This API is EXPERIMENTAL.
func WithSkipWindowsDriveLetterNormalization() ParserOption {
return newFuncParserOption(func(o *parserOptions) {
o.skipWindowsDriveLetterNormalization = true
})
}
// WithSpecialSchemes allows overriding the notion of special schemes.
// special is a map of 'scheme' => 'default port'
//
// WhatWg standard removed gopher from special schemes. This is how you add it back:
//
// special := map[string]string{
// "ftp": "21",
// "file": "",
// "http": "80",
// "https": "443",
// "ws": "80",
// "wss": "443",
// "gopher": "70",
// }
//
// This API is EXPERIMENTAL.
func WithSpecialSchemes(special map[string]string) ParserOption {
return newFuncParserOption(func(o *parserOptions) {
o.specialSchemes = special
})
}
// WithEncodingOverride allows to set an encoding other than UTF-8 when parsing.
//
// This API is EXPERIMENTAL.
func WithEncodingOverride(cm *charmap.Charmap) ParserOption {
return newFuncParserOption(func(o *parserOptions) {
o.encodingOverride = cm
})
}
// WithPathPercentEncodeSet allows to set an alternative set of characters to percent encode in path component.
//
// This API is EXPERIMENTAL.
func WithPathPercentEncodeSet(encodeSet *PercentEncodeSet) ParserOption {
return newFuncParserOption(func(o *parserOptions) {
o.pathPercentEncodeSet = encodeSet
})
}
// WithQueryPercentEncodeSet allows to set an alternative set of characters to percent encode in query component
// when scheme is not special.
//
// This API is EXPERIMENTAL.
func WithQueryPercentEncodeSet(encodeSet *PercentEncodeSet) ParserOption {
return newFuncParserOption(func(o *parserOptions) {
o.queryPercentEncodeSet = encodeSet
})
}
// WithSpecialQueryPercentEncodeSet allows to set an alternative set of characters to percent encode in query component
// when scheme is special.
//
// This API is EXPERIMENTAL.
func WithSpecialQueryPercentEncodeSet(encodeSet *PercentEncodeSet) ParserOption {
return newFuncParserOption(func(o *parserOptions) {
o.specialQueryPercentEncodeSet = encodeSet
})
}
// WithFragmentPathPercentEncodeSet allows to set an alternative set of characters to percent encode in fragment
// component when scheme is not special.
//
// This API is EXPERIMENTAL.
func WithFragmentPathPercentEncodeSet(encodeSet *PercentEncodeSet) ParserOption {
return newFuncParserOption(func(o *parserOptions) {
o.fragmentPercentEncodeSet = encodeSet
})
}
// WithSpecialFragmentPathPercentEncodeSet allows to set an alternative set of characters to percent encode in fragment
// component when scheme is special.
//
// This API is EXPERIMENTAL.
func WithSpecialFragmentPathPercentEncodeSet(encodeSet *PercentEncodeSet) ParserOption {
return newFuncParserOption(func(o *parserOptions) {
o.fragmentPercentEncodeSet = encodeSet
})
}
// WithSkipTrailingSlashNormalization skips normalizing of empty paths.
//
// This API is EXPERIMENTAL.
func WithSkipTrailingSlashNormalization() ParserOption {
return newFuncParserOption(func(o *parserOptions) {
o.skipTrailingSlashNormalization = true
})
}
// WithSkipEqualsForEmptySearchParamsValue skips writing '=' when setting an empty value for a search parameter.
//
// e.g. url.SearchParams().Set("name", "") gives 'http://...?name' instead of 'http://...?name='
//
// This API is EXPERIMENTAL.
func WithSkipEqualsForEmptySearchParamsValue() ParserOption {
return newFuncParserOption(func(o *parserOptions) {
o.skipEqualsForEmptySearchParamsValue = true
})
}
+75
View File
@@ -0,0 +1,75 @@
package url
import (
"strings"
)
type path struct {
p []string
opaque bool
}
func (p *path) isOpaque() bool {
return p.opaque
}
func (p *path) isEmpty() bool {
return len(p.p) == 0
}
func (p *path) setOpaque(opaquePath string) {
p.p = []string{opaquePath}
p.opaque = true
}
func (p *path) addSegment(segment string) {
p.p = append(p.p, segment)
p.opaque = false
}
func (p *path) init() {
p.p = []string{}
p.opaque = false
}
func (p *path) shortenPath(scheme string) {
if scheme == "file" && len(p.p) == 1 && isNormalizedWindowsDriveLetter(p.p[0]) {
return
}
if len(p.p) == 0 {
return
}
p.p = p.p[0 : len(p.p)-1]
}
func (p *path) stripTrailingSpacesIfOpaque() {
if p.opaque {
p.p[0] = strings.TrimRight(p.p[0], "\u0020")
}
}
func (p *path) clone() *path {
if p == nil {
return nil
}
newPath := &path{
opaque: p.opaque,
}
if p.p != nil {
newPath.p = make([]string, len(p.p))
copy(newPath.p, p.p)
}
return newPath
}
func (p *path) String() string {
if p.opaque {
return p.p[0]
} else {
output := ""
for _, pp := range p.p {
output += "/" + pp
}
return output
}
}
+201
View File
@@ -0,0 +1,201 @@
/*
* Copyright 2020 National Library of Norway.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package url
import (
"sort"
"strings"
)
type NameValuePair struct {
Name, Value string
}
// SearchParams represents a set of query parameters.
type SearchParams struct {
url *Url
params []*NameValuePair
}
func (s *SearchParams) init(query string) {
s.params = s.params[:0]
p := strings.Split(query, "&")
for _, q := range p {
if q == "" {
continue
}
kv := strings.SplitN(q, "=", 2)
name := s.url.parser.DecodePercentEncoded(kv[0])
name = strings.ReplaceAll(name, "+", " ")
nvp := &NameValuePair{Name: name}
if len(kv) == 2 {
value := s.url.parser.DecodePercentEncoded(kv[1])
value = strings.ReplaceAll(value, "+", " ")
nvp.Value = value
}
s.params = append(s.params, nvp)
}
}
func (s *SearchParams) update() {
if s.url == nil {
return
}
query := s.String()
if (query == "" && s.url.query != nil) || query != "" {
s.url.query = &query
}
}
// Append appends a new name/value pair to the search parameters.
func (s *SearchParams) Append(name, value string) {
s.params = append(s.params, &NameValuePair{Name: name, Value: value})
s.update()
}
// Delete deletes the given search parameter, and its associated value(s), from the search parameters.
func (s *SearchParams) Delete(name string) {
var result []*NameValuePair
for _, nvp := range s.params {
if nvp.Name != name {
result = append(result, nvp)
}
}
s.params = result
s.update()
}
// Get returns the first value associated with the given search parameter name.
func (s *SearchParams) Get(name string) string {
for _, nvp := range s.params {
if nvp.Name == name {
return nvp.Value
}
}
return ""
}
// GetAll returns all the values associated with the given search parameter name.
func (s *SearchParams) GetAll(name string) []string {
var result []string
for _, nvp := range s.params {
if nvp.Name == name {
result = append(result, nvp.Value)
}
}
return result
}
// Has returns true if the search parameters contains a parameter with the given name.
func (s *SearchParams) Has(name string) bool {
for _, nvp := range s.params {
if nvp.Name == name {
return true
}
}
return false
}
// Set sets the value associated with name to value. It replaces any existing values associated with name.
func (s *SearchParams) Set(name, value string) {
isSet := false
params := s.params[:0]
for i, nvp := range s.params {
if nvp.Name == name {
if isSet {
s.params[i] = nil
continue
}
nvp.Value = value
isSet = true
}
params = append(params, nvp)
}
if !isSet {
s.params = append(params, &NameValuePair{Name: name, Value: value})
} else {
s.params = params
}
s.update()
}
// Sort sorts the search parameters by name.
func (s *SearchParams) Sort() {
sort.SliceStable(s.params, func(i, j int) bool {
return s.params[i].Name < s.params[j].Name
})
s.update()
}
// SortAbsolute sorts the search parameters by name and value.
func (s *SearchParams) SortAbsolute() {
sort.SliceStable(s.params, func(i, j int) bool {
return s.params[i].Name+s.params[i].Value < s.params[j].Name+s.params[j].Value
})
s.update()
}
// Iterate iterates over the search parameters.
func (s *SearchParams) Iterate(f func(pair *NameValuePair)) {
for _, nvp := range s.params {
f(nvp)
}
s.update()
}
func (s *SearchParams) String() string {
output := strings.Builder{}
for idx, nvp := range s.params {
if idx > 0 {
output.WriteRune('&')
}
s.QueryEscape(nvp.Name, &output)
if !s.url.parser.opts.skipEqualsForEmptySearchParamsValue || nvp.Value != "" {
output.WriteRune('=')
}
if nvp.Value != "" {
s.QueryEscape(nvp.Value, &output)
}
}
return output.String()
}
func (s *SearchParams) QueryEscape(st string, output *strings.Builder) {
for _, b := range st {
if b == 0x0020 {
output.WriteRune(0x002B)
} else {
output.WriteString(s.url.parser.percentEncodeRune(b, s.url.parser.opts.queryPercentEncodeSet))
}
}
}
// Clone returns a deep copy of the search parameters.
func (s *SearchParams) Clone() *SearchParams {
sp := &SearchParams{
url: s.url,
params: make([]*NameValuePair, len(s.params)),
}
for i, nvp := range s.params {
sp.params[i] = &NameValuePair{
Name: nvp.Name,
Value: nvp.Value,
}
}
return sp
}
+334
View File
@@ -0,0 +1,334 @@
/*
* Copyright 2019 National Library of Norway.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package url
import (
"strings"
)
// Url represents a URL.
type Url struct {
inputUrl string
scheme string
username string
password string
host *string
port *string
decodedPort int
path *path
query *string
fragment *string
searchParams *SearchParams
validationErrors []error
parser *parser
isIPv4 bool
isIPv6 bool
}
// Href implements WHATWG url api (https://url.spec.whatwg.org/#api)
// If excludeFragment is true, the fragment component will be excluded from the output.
func (u *Url) Href(excludeFragment bool) string {
output := u.scheme + ":"
if u.host != nil {
output += "//"
if u.username != "" || u.password != "" {
output += u.username
if u.password != "" {
output += ":" + u.password
}
output += "@"
}
output += *u.host
if u.port != nil {
output += ":" + *u.port
}
}
if u.host == nil && !u.path.isOpaque() && len(u.path.p) > 1 && u.path.p[0] == "" {
output += "/."
}
output += u.path.String()
if u.query != nil {
output += "?" + *u.query
}
if !excludeFragment && u.fragment != nil {
output += "#" + *u.fragment
}
return output
}
// Protocol implements WHATWG url api (https://url.spec.whatwg.org/#api)
func (u *Url) Protocol() string {
return u.scheme + ":"
}
// SetProtocol implements WHATWG url api (https://url.spec.whatwg.org/#api)
func (u *Url) SetProtocol(scheme string) {
if !strings.HasSuffix(scheme, ":") {
scheme = scheme + ":"
}
_, _ = u.parser.BasicParser(scheme, nil, u, StateSchemeStart)
}
func (u *Url) Scheme() string {
return u.scheme
}
// Username implements WHATWG url api (https://url.spec.whatwg.org/#api)
func (u *Url) Username() string {
return u.username
}
// SetUsername implements WHATWG url api (https://url.spec.whatwg.org/#api)
func (u *Url) SetUsername(username string) {
if u.host == nil || *u.host == "" || u.scheme == "file" {
return
}
u.username = u.parser.PercentEncodeString(username, UserInfoPercentEncodeSet)
}
// Password implements WHATWG url api (https://url.spec.whatwg.org/#api)
func (u *Url) Password() string {
return u.password
}
// SetPassword implements WHATWG url api (https://url.spec.whatwg.org/#api)
func (u *Url) SetPassword(password string) {
if u.host == nil || *u.host == "" || u.scheme == "file" {
return
}
u.password = u.parser.PercentEncodeString(password, UserInfoPercentEncodeSet)
}
// Host implements WHATWG url api (https://url.spec.whatwg.org/#api)
func (u *Url) Host() string {
if u.host == nil {
return ""
}
if u.port == nil {
return *u.host
}
return *u.host + ":" + *u.port
}
// SetHost implements WHATWG url api (https://url.spec.whatwg.org/#api)
func (u *Url) SetHost(host string) {
if u.path.isOpaque() {
return
}
_, _ = u.parser.BasicParser(host, nil, u, StateHost)
}
// Hostname implements WHATWG url api (https://url.spec.whatwg.org/#api)
func (u *Url) Hostname() string {
if u.host == nil {
return ""
}
return *u.host
}
// SetHostname implements WHATWG url api (https://url.spec.whatwg.org/#api)
func (u *Url) SetHostname(host string) {
if u.path.isOpaque() {
return
}
_, _ = u.parser.BasicParser(host, nil, u, StateHostname)
}
// Port implements WHATWG url api (https://url.spec.whatwg.org/#api)
func (u *Url) Port() string {
if u.port == nil {
return ""
}
return *u.port
}
// SetPort implements WHATWG url api (https://url.spec.whatwg.org/#api)
func (u *Url) SetPort(port string) {
if u.host == nil || *u.host == "" || u.scheme == "file" {
return
}
if port == "" {
u.port = nil
u.decodedPort = 0
} else {
_, _ = u.parser.BasicParser(port, nil, u, StatePort)
}
}
func (u *Url) DecodedPort() int {
if u.decodedPort == 0 {
return u.getDefaultPort()
} else {
return u.decodedPort
}
}
// Pathname implements WHATWG url api (https://url.spec.whatwg.org/#api)
func (u *Url) Pathname() string {
return u.path.String()
}
// SetPathname implements WHATWG url api (https://url.spec.whatwg.org/#api)
func (u *Url) SetPathname(path string) {
if u.path.isOpaque() {
return
}
u.path.init()
_, _ = u.parser.BasicParser(path, nil, u, StatePathStart)
}
// OpaquePath tells if the path is opaque (https://url.spec.whatwg.org/#url-opaque-path)
func (u *Url) OpaquePath() bool {
return u.path.opaque
}
// Search implements WHATWG url api (https://url.spec.whatwg.org/#api)
func (u *Url) Search() string {
if u.query == nil || len(*u.query) == 0 {
return ""
}
return "?" + *u.query
}
// SetSearch implements WHATWG url api (https://url.spec.whatwg.org/#api)
func (u *Url) SetSearch(query string) {
if query == "" {
u.query = nil
if u.searchParams != nil {
u.searchParams.params = u.searchParams.params[:0]
}
if u.fragment == nil && u.query == nil {
u.path.stripTrailingSpacesIfOpaque()
}
return
}
query = strings.TrimPrefix(query, "?")
if u.query == nil {
u.query = new(string)
}
_, _ = u.parser.BasicParser(query, nil, u, StateQuery)
if u.searchParams == nil {
u.newUrlSearchParams()
} else {
u.searchParams.init(*u.query)
}
}
// SearchParams implements WHATWG url api (https://url.spec.whatwg.org/#api)
func (u *Url) SearchParams() *SearchParams {
if u.searchParams == nil {
u.newUrlSearchParams()
}
return u.searchParams
}
func (u *Url) SetSearchParams(searchParams *SearchParams) {
u.searchParams = searchParams
u.searchParams.update()
}
func (u *Url) Query() string {
if u.query == nil || len(*u.query) == 0 {
return ""
}
return *u.query
}
// Hash implements WHATWG url api (https://url.spec.whatwg.org/#api)
func (u *Url) Hash() string {
if u.fragment == nil || len(*u.fragment) == 0 {
return ""
}
return "#" + *u.fragment
}
// SetHash implements WHATWG url api (https://url.spec.whatwg.org/#api)
func (u *Url) SetHash(fragment string) {
if fragment == "" {
u.fragment = nil
if u.fragment == nil && u.query == nil {
u.path.stripTrailingSpacesIfOpaque()
}
return
}
fragment = strings.TrimPrefix(fragment, "#")
u.fragment = new(string)
_, _ = u.parser.BasicParser(fragment, nil, u, StateFragment)
}
func (u *Url) Fragment() string {
if u.fragment == nil || len(*u.fragment) == 0 {
return ""
}
return *u.fragment
}
func (u *Url) String() string {
return u.Href(false)
}
func (u *Url) ValidationErrors() []error {
return u.validationErrors
}
func (u *Url) newUrlSearchParams() {
usp := &SearchParams{url: u}
if u.query != nil {
usp.init(*u.query)
}
u.searchParams = usp
}
func (u *Url) IsIPv4() bool {
return u.isIPv4
}
func (u *Url) IsIPv6() bool {
return u.isIPv6
}
// Clone returns a deep copy of the URL.
func (u *Url) Clone() *Url {
return &Url{
inputUrl: u.inputUrl,
scheme: u.scheme,
username: u.username,
password: u.password,
host: cloneStringPointer(u.host),
port: cloneStringPointer(u.port),
decodedPort: u.decodedPort,
path: u.path.clone(),
query: cloneStringPointer(u.query),
fragment: cloneStringPointer(u.fragment),
searchParams: u.SearchParams().Clone(),
parser: u.parser,
isIPv4: u.isIPv4,
isIPv6: u.isIPv6,
}
}
func cloneStringPointer(s *string) *string {
if s == nil {
return nil
}
c := *s
return &c
}