feat: Inital commit

This commit is contained in:
2025-07-26 05:58:59 +00:00
commit 753d1c60ea
1849 changed files with 830533 additions and 0 deletions
+529
View File
@@ -0,0 +1,529 @@
/*
* Copyright 2019 National Library of Norway.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package url
import (
goerrors "errors"
"fmt"
"math"
"strconv"
"strings"
"unicode/utf8"
"golang.org/x/net/idna"
"github.com/nlnwa/whatwg-url/errors"
)
// parseHost parses the host part of the input string.
func (p *parser) parseHost(u *Url, parser *parser, input string, isNotSpecial bool) (string, error) {
if p.opts.preParseHostFunc != nil {
input = p.opts.preParseHostFunc(u, input)
}
if input == "" {
return "", nil
}
if input[0] == '[' {
if !strings.HasSuffix(input, "]") {
if err := p.handleError(u, errors.IPv6Unclosed, true); err != nil {
return "", err
}
}
input = strings.Trim(input, "[]")
return p.parseIPv6(u, newInputString(input))
}
if isNotSpecial {
return p.parseOpaqueHost(u, input)
}
domain := p.DecodePercentEncoded(input)
if !utf8.ValidString(domain) {
if p.opts.laxHostParsing {
return percentEncodeString(input, HostPercentEncodeSet), nil
}
if err := p.handleErrorWithDescription(u, errors.DomainToASCII, true, "not a valid UTF-8 string"); err != nil {
return "", err
}
}
asciiDomain, err := p.ToASCII(domain, false)
if err != nil {
if p.opts.laxHostParsing {
return domain, nil
}
if err := p.handleWrappedError(u, errors.DomainToASCII, true, err); err != nil {
return "", err
}
}
for _, c := range asciiDomain {
if ForbiddenDomainCodePoint.Test(uint(c)) {
if p.opts.laxHostParsing {
return parser.PercentEncodeString(asciiDomain, HostPercentEncodeSet), nil
} else {
if err := p.handleErrorWithDescription(u, errors.DomainInvalidCodePoint, true, string(c)); err != nil {
return "", err
}
}
}
}
if p.endsInANumber(u, asciiDomain) {
ipv4Host, err := p.parseIPv4(u, asciiDomain)
return ipv4Host, err
}
if p.opts.postParseHostFunc != nil {
asciiDomain = p.opts.postParseHostFunc(u, asciiDomain)
}
return asciiDomain, nil
}
func (p *parser) endsInANumber(u *Url, input string) bool {
parts := strings.Split(input, ".")
if parts[len(parts)-1] == "" {
if len(parts) == 1 {
return false
}
parts = parts[0 : len(parts)-1]
}
last := parts[len(parts)-1]
if last != "" && containsOnly(last, ASCIIDigit) {
return true
}
if _, _, err := p.parseIPv4Number(u, last); err == nil || goerrors.Is(err, strconv.ErrRange) {
return true
}
return false
}
func (p *parser) parseIPv4Number(u *Url, input string) (number int64, validationError bool, err error) {
if input == "" {
if err = p.handleError(u, errors.IPv4EmptyPart, true); err != nil {
return
}
}
R := 10
if len(input) >= 2 && (strings.HasPrefix(input, "0x") || strings.HasPrefix(input, "0X")) {
validationError = true
input = input[2:]
R = 16
} else if len(input) >= 2 && strings.HasPrefix(input, "0") {
validationError = true
input = input[1:]
R = 8
}
if input == "" {
validationError = true
return
}
number, err = strconv.ParseInt(input, R, 64)
return
}
func (p *parser) parseIPv4(u *Url, input string) (string, error) {
parts := strings.Split(input, ".")
if parts[len(parts)-1] == "" {
if err := p.handleError(u, errors.IPv4EmptyPart, false); err != nil {
return input, err
}
if len(parts) > 1 {
parts = parts[:len(parts)-1]
}
}
if len(parts) > 4 {
if err := p.handleError(u, errors.IPv4TooManyParts, true); err != nil {
return input, err
}
}
var numbers []int64
for _, part := range parts {
n, validationError, err := p.parseIPv4Number(u, part)
if err != nil {
if err := p.handleWrappedError(u, errors.IPv4NonNumericPart, true, err); err != nil {
return input, err
}
}
if validationError {
if err := p.handleError(u, errors.IPv4NonDecimalPart, false); err != nil {
return input, err
}
}
numbers = append(numbers, n)
}
for _, n := range numbers {
if n > 255 {
if err := p.handleError(u, errors.IPv4OutOfRangePart, false); err != nil {
return input, err
}
}
}
for _, n := range numbers[:len(numbers)-1] {
if n > 255 {
if err := p.handleError(u, errors.IPv4OutOfRangePart, true); err != nil {
return "", err
}
}
}
if numbers[len(numbers)-1] >= int64(math.Pow(256, float64(5-len(numbers)))) {
if err := p.handleError(u, errors.IPv4OutOfRangePart, true); err != nil {
return "", err
}
}
var ipv4 = IPv4Addr(numbers[len(numbers)-1])
numbers = numbers[:len(numbers)-1]
for counter, n := range numbers {
ipv4 += IPv4Addr(n * int64(math.Pow(256, float64(3-counter))))
}
u.isIPv4 = true
return ipv4.String(), nil
}
func (p *parser) parseIPv6(u *Url, input *inputString) (string, error) {
address := &IPv6Addr{}
pieceIdx := 0
compress := -1
c := input.nextCodePoint()
if c == ':' {
if !input.remainingStartsWith(":") {
if err := p.handleError(u, errors.IPv6InvalidCompression, true); err != nil {
return "", err
}
}
input.nextCodePoint()
c = input.nextCodePoint()
pieceIdx++
compress = pieceIdx
}
for !input.eof {
if pieceIdx == 8 {
if err := p.handleError(u, errors.IPv6TooManyPieces, true); err != nil {
return "", err
}
}
if c == ':' {
if compress >= 0 {
if err := p.handleError(u, errors.IPv6MultipleCompression, true); err != nil {
return "", err
}
}
c = input.nextCodePoint()
pieceIdx++
compress = pieceIdx
continue
}
value := 0
length := 0
for length < 4 && ASCIIHexDigit.Test(uint(c)) {
v, _ := strconv.ParseInt(string(c), 16, 32)
value = value*0x10 + int(v)
c = input.nextCodePoint()
length++
}
if c == '.' {
if length == 0 {
if err := p.handleError(u, errors.IPv4InIPv6InvalidCodePoint, true); err != nil {
return "", err
}
}
input.rewind(length + 1)
c = input.nextCodePoint()
if pieceIdx > 6 {
if err := p.handleError(u, errors.IPv4InIPv6TooManyPieces, true); err != nil {
return "", err
}
}
numbersSeen := 0
for !input.eof {
ipv4Piece := -1
if numbersSeen > 0 {
if c == '.' && numbersSeen < 4 {
c = input.nextCodePoint()
} else {
if err := p.handleError(u, errors.IPv4InIPv6InvalidCodePoint, true); err != nil {
return "", err
}
}
}
if !ASCIIDigit.Test(uint(c)) {
if err := p.handleError(u, errors.IPv4InIPv6InvalidCodePoint, true); err != nil {
return "", err
}
}
for ASCIIDigit.Test(uint(c)) {
number, _ := strconv.Atoi(string(c))
if ipv4Piece < 0 {
ipv4Piece = number
} else if ipv4Piece == 0 {
if err := p.handleError(u, errors.IPv4InIPv6InvalidCodePoint, true); err != nil {
return "", err
}
} else {
ipv4Piece = ipv4Piece*10 + number
}
if ipv4Piece > 255 {
if err := p.handleError(u, errors.IPv4InIPv6OutOfRangePart, true); err != nil {
return "", err
}
}
c = input.nextCodePoint()
}
address[pieceIdx] = address[pieceIdx]*0x100 + uint16(ipv4Piece)
numbersSeen++
if numbersSeen == 2 || numbersSeen == 4 {
pieceIdx++
}
}
if numbersSeen != 4 {
if err := p.handleError(u, errors.IPv4InIPv6TooFewParts, true); err != nil {
return "", err
}
}
break
} else if c == ':' {
c = input.nextCodePoint()
if input.eof {
if err := p.handleError(u, errors.IPv6InvalidCodePoint, true); err != nil {
return "", err
}
}
} else if !input.eof {
if err := p.handleError(u, errors.IPv6InvalidCodePoint, true); err != nil {
return "", err
}
}
address[pieceIdx] = uint16(value)
pieceIdx++
}
if compress >= 0 {
swaps := pieceIdx - compress
pieceIdx = 7
for pieceIdx != 0 && swaps > 0 {
t := address[pieceIdx]
address[pieceIdx] = address[compress+swaps-1]
address[compress+swaps-1] = t
pieceIdx--
swaps--
}
} else if compress < 0 && pieceIdx != 8 {
if err := p.handleError(u, errors.IPv6TooFewPieces, true); err != nil {
return "", err
}
}
u.isIPv6 = true
return "[" + address.String() + "]", nil
}
func (p *parser) parseOpaqueHost(u *Url, input string) (string, error) {
output := ""
for i, c := range input {
if ForbiddenHostCodePoint.Test(uint(c)) {
if p.opts.laxHostParsing {
return input, nil
} else {
if err := p.handleErrorWithDescription(u, errors.HostInvalidCodePoint, true, string(c)); err != nil {
return "", err
}
}
}
if !isURLCodePoint(c) && c != '%' {
if err := p.handleErrorWithDescription(u, errors.InvalidURLUnit, false, string(c)); err != nil {
return "", err
}
}
if c == '%' {
invalidPercentEncoding, d := remainingIsInvalidPercentEncoded([]rune(input[i:]))
if invalidPercentEncoding {
if err := p.handleErrorWithDescription(u, errors.InvalidURLUnit, false, d); err != nil {
return "", err
}
}
}
output += p.percentEncodeRune(c, C0PercentEncodeSet)
}
return output, nil
}
type IPv6Addr [8]uint16
func (address *IPv6Addr) String() string {
output := ""
compress := -1
currentIdx := -1
currentLength := 0
compressLength := 0
for pieceIdx := 0; pieceIdx < 8; pieceIdx++ {
if address[pieceIdx] == 0 {
if currentIdx < 0 {
currentIdx = pieceIdx
}
currentLength++
} else {
if currentLength > 1 && currentLength > compressLength {
compress = currentIdx
compressLength = currentLength
}
currentIdx = -1
currentLength = 0
}
}
if currentLength > 1 && currentLength > compressLength {
compress = currentIdx
}
ignore0 := false
for pieceIdx := 0; pieceIdx < 8; pieceIdx++ {
if ignore0 && address[pieceIdx] == 0 {
continue
} else if ignore0 {
ignore0 = false
}
if compress == pieceIdx {
separator := ":"
if pieceIdx == 0 {
separator = "::"
}
output += separator
ignore0 = true
continue
}
output += strconv.FormatUint(uint64(address[pieceIdx]), 16)
// 32512
if pieceIdx != 7 {
output += ":"
}
}
return output
}
type IPv4Addr uint32
func (address IPv4Addr) String() string {
return strconv.Itoa(int(address>>24)) + "." +
strconv.Itoa(int((address>>16)&0xFF)) + "." +
strconv.Itoa(int((address>>8)&0xFF)) + "." +
strconv.Itoa(int(address&0xFF))
}
var idnaProfile = idna.New(
idna.MapForLookup(),
idna.BidiRule(),
idna.VerifyDNSLength(false),
idna.StrictDomainName(true),
idna.ValidateLabels(true),
idna.CheckHyphens(false),
idna.CheckJoiners(true),
idna.Transitional(false),
)
// ToASCII converts a string to ASCII using IDNA
// https://url.spec.whatwg.org/#concept-domain-to-ascii
func (p *parser) ToASCII(src string, beStrict bool) (string, error) {
if src == "" {
return "", nil
}
// If encoding is set, convert to Unicode
if p.opts.encodingOverride != nil {
if u, err := p.stringToUnicode(src); err == nil {
src = u
}
}
// Convert to punycode
a, err := idnaProfile.ToASCII(src)
if err != nil {
if !beStrict {
if containsOnlyASCIIOrMiscAndNoPunycode(src) {
return a, nil
}
}
if !p.opts.laxHostParsing {
return a, err
}
}
if a == "" {
return "", fmt.Errorf("idna toAscii returned empty string")
}
return a, nil
}
// containsOnlyASCIIOrMiscAndNoPunycode returns true if the string contains only ASCII characters or characters from Section 4.1.1 in UTS #46
// and does not contain any labels starting with acePrefix (xn--)
func containsOnlyASCIIOrMiscAndNoPunycode(s string) bool {
s = strings.ToLower(s)
p := 0
for _, r := range s {
if r >= utf8.RuneSelf && r != '\u2260' && r != '\u226e' && r != '\u226f' {
return false
}
switch {
case r == '.':
p = 0
case p == 0 && r == 'x':
p = 1
case p == 1 && r == 'n':
p = 2
case p == 2 && r == '-':
p = 3
case p == 3 && r == '-':
return false
default:
p = -1
}
}
return true
}
func (p *parser) stringToUnicode(src string) (string, error) {
var bb []byte
for _, r := range src {
if b, ok := p.opts.encodingOverride.EncodeRune(r); ok && b > 31 {
bb = append(bb, b)
} else {
return "", fmt.Errorf("could not conver %v to Unicode using %v", src, p.opts.encodingOverride.String())
}
}
return string(bb), nil
}
func percentEncodeString(s string, tr *PercentEncodeSet) string {
sb := strings.Builder{}
for _, b := range []byte(s) {
sb.WriteString(percentEncodeByte(b, tr))
}
return sb.String()
}
func percentEncodeByte(b byte, tr *PercentEncodeSet) string {
if tr != nil && !tr.ByteShouldBeEncoded(b) {
return string(b)
}
percentEncoded := make([]byte, 3)
percentEncoded[0] = '%'
percentEncoded[1] = "0123456789ABCDEF"[b>>4]
percentEncoded[2] = "0123456789ABCDEF"[b&15]
return string(percentEncoded)
}