feat: Inital commit
This commit is contained in:
+32
@@ -0,0 +1,32 @@
|
||||
# vscode
|
||||
.vscode
|
||||
debug
|
||||
*.test
|
||||
|
||||
./build
|
||||
|
||||
# Compiled Object files, Static and Dynamic libs (Shared Objects)
|
||||
*.o
|
||||
*.a
|
||||
*.so
|
||||
|
||||
|
||||
# Folders
|
||||
_obj
|
||||
_test
|
||||
|
||||
# Architecture specific extensions/prefixes
|
||||
*.[568vq]
|
||||
[568vq].out
|
||||
|
||||
*.cgo1.go
|
||||
*.cgo2.c
|
||||
_cgo_defun.c
|
||||
_cgo_gotypes.go
|
||||
_cgo_export.*
|
||||
|
||||
_testmain.go
|
||||
|
||||
*.exe
|
||||
*.test
|
||||
*.prof
|
||||
+17
@@ -0,0 +1,17 @@
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
+156
@@ -0,0 +1,156 @@
|
||||
# htmlquery
|
||||
|
||||
[](https://github.com/antchfx/htmlquery/actions/workflows/testing.yml)
|
||||
[](https://godoc.org/github.com/antchfx/htmlquery)
|
||||
[](https://goreportcard.com/report/github.com/antchfx/htmlquery)
|
||||
|
||||
# Overview
|
||||
|
||||
`htmlquery` is an XPath query package for HTML, lets you extract data or evaluate from HTML documents by an XPath expression.
|
||||
|
||||
`htmlquery` built-in the query object caching feature based on [LRU](https://godoc.org/github.com/golang/groupcache/lru), this feature will caching the recently used XPATH query string. Enable query caching can avoid re-compile XPath expression each query.
|
||||
|
||||
You can visit this page to learn about the supported XPath(1.0/2.0) syntax. https://github.com/antchfx/xpath
|
||||
|
||||
# XPath query packages for Go
|
||||
|
||||
| Name | Description |
|
||||
| ------------------------------------------------- | ----------------------------------------- |
|
||||
| [htmlquery](https://github.com/antchfx/htmlquery) | XPath query package for the HTML document |
|
||||
| [xmlquery](https://github.com/antchfx/xmlquery) | XPath query package for the XML document |
|
||||
| [jsonquery](https://github.com/antchfx/jsonquery) | XPath query package for the JSON document |
|
||||
|
||||
# Installation
|
||||
|
||||
```
|
||||
go get github.com/antchfx/htmlquery
|
||||
```
|
||||
|
||||
# Getting Started
|
||||
|
||||
#### Query, returns matched elements or error.
|
||||
|
||||
```go
|
||||
nodes, err := htmlquery.QueryAll(doc, "//a")
|
||||
if err != nil {
|
||||
panic(`not a valid XPath expression.`)
|
||||
}
|
||||
```
|
||||
|
||||
#### Load HTML document from URL.
|
||||
|
||||
```go
|
||||
doc, err := htmlquery.LoadURL("http://example.com/")
|
||||
```
|
||||
|
||||
#### Load HTML from document.
|
||||
|
||||
```go
|
||||
filePath := "/home/user/sample.html"
|
||||
doc, err := htmlquery.LoadDoc(filePath)
|
||||
```
|
||||
|
||||
#### Load HTML document from string.
|
||||
|
||||
```go
|
||||
s := `<html>....</html>`
|
||||
doc, err := htmlquery.Parse(strings.NewReader(s))
|
||||
```
|
||||
|
||||
#### Find all A elements.
|
||||
|
||||
```go
|
||||
list := htmlquery.Find(doc, "//a")
|
||||
```
|
||||
|
||||
#### Find all A elements that have `href` attribute.
|
||||
|
||||
```go
|
||||
list := htmlquery.Find(doc, "//a[@href]")
|
||||
```
|
||||
|
||||
#### Find all A elements with `href` attribute and only return `href` value.
|
||||
|
||||
```go
|
||||
list := htmlquery.Find(doc, "//a/@href")
|
||||
for _ , n := range list{
|
||||
fmt.Println(htmlquery.InnerText(n)) // output @href value
|
||||
}
|
||||
```
|
||||
|
||||
### Find the third A element.
|
||||
|
||||
```go
|
||||
a := htmlquery.FindOne(doc, "//a[3]")
|
||||
```
|
||||
|
||||
### Find children element (img) under A `href` and print the source
|
||||
|
||||
```go
|
||||
a := htmlquery.FindOne(doc, "//a")
|
||||
img := htmlquery.FindOne(a, "//img")
|
||||
fmt.Prinln(htmlquery.SelectAttr(img, "src")) // output @src value
|
||||
```
|
||||
|
||||
#### Evaluate the number of all IMG element.
|
||||
|
||||
```go
|
||||
expr, _ := xpath.Compile("count(//img)")
|
||||
v := expr.Evaluate(htmlquery.CreateXPathNavigator(doc)).(float64)
|
||||
fmt.Printf("total count is %f", v)
|
||||
```
|
||||
|
||||
# Quick Starts
|
||||
|
||||
```go
|
||||
func main() {
|
||||
doc, err := htmlquery.LoadURL("https://www.bing.com/search?q=golang")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
// Find all news item.
|
||||
list, err := htmlquery.QueryAll(doc, "//ol/li")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
for i, n := range list {
|
||||
a := htmlquery.FindOne(n, "//a")
|
||||
if a != nil {
|
||||
fmt.Printf("%d %s(%s)\n", i, htmlquery.InnerText(a), htmlquery.SelectAttr(a, "href"))
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
# FAQ
|
||||
|
||||
#### `Find()` vs `QueryAll()`, which is better?
|
||||
|
||||
`Find` and `QueryAll` both do the same things, searches all of matched html nodes.
|
||||
The `Find` will panics if you give an error XPath query, but `QueryAll` will return an error for you.
|
||||
|
||||
#### Can I save my query expression object for the next query?
|
||||
|
||||
Yes, you can. We offer the `QuerySelector` and `QuerySelectorAll` methods, It will accept your query expression object.
|
||||
|
||||
Cache a query expression object(or reused) will avoid re-compile XPath query expression, improve your query performance.
|
||||
|
||||
#### XPath query object cache performance
|
||||
|
||||
```
|
||||
goos: windows
|
||||
goarch: amd64
|
||||
pkg: github.com/antchfx/htmlquery
|
||||
BenchmarkSelectorCache-4 20000000 55.2 ns/op
|
||||
BenchmarkDisableSelectorCache-4 500000 3162 ns/op
|
||||
```
|
||||
|
||||
#### How to disable caching?
|
||||
|
||||
```
|
||||
htmlquery.DisableSelectorCache = true
|
||||
```
|
||||
|
||||
# Questions
|
||||
|
||||
Please let me know if you have any questions.
|
||||
+42
@@ -0,0 +1,42 @@
|
||||
package htmlquery
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/antchfx/xpath"
|
||||
"github.com/golang/groupcache/lru"
|
||||
)
|
||||
|
||||
// DisableSelectorCache will disable caching for the query selector if value is true.
|
||||
var DisableSelectorCache = false
|
||||
|
||||
// SelectorCacheMaxEntries allows how many selector object can be caching. Default is 50.
|
||||
// Will disable caching if SelectorCacheMaxEntries <= 0.
|
||||
var SelectorCacheMaxEntries = 50
|
||||
|
||||
var (
|
||||
cacheOnce sync.Once
|
||||
cache *lru.Cache
|
||||
cacheMutex sync.Mutex
|
||||
)
|
||||
|
||||
func getQuery(expr string) (*xpath.Expr, error) {
|
||||
if DisableSelectorCache || SelectorCacheMaxEntries <= 0 {
|
||||
return xpath.Compile(expr)
|
||||
}
|
||||
cacheOnce.Do(func() {
|
||||
cache = lru.New(SelectorCacheMaxEntries)
|
||||
})
|
||||
cacheMutex.Lock()
|
||||
defer cacheMutex.Unlock()
|
||||
if v, ok := cache.Get(expr); ok {
|
||||
return v.(*xpath.Expr), nil
|
||||
}
|
||||
v, err := xpath.Compile(expr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
cache.Add(expr, v)
|
||||
return v, nil
|
||||
|
||||
}
|
||||
+377
@@ -0,0 +1,377 @@
|
||||
/*
|
||||
Package htmlquery provides extract data from HTML documents using XPath expression.
|
||||
*/
|
||||
package htmlquery
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"compress/gzip"
|
||||
"compress/zlib"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/antchfx/xpath"
|
||||
"golang.org/x/net/html"
|
||||
"golang.org/x/net/html/charset"
|
||||
)
|
||||
|
||||
var _ xpath.NodeNavigator = &NodeNavigator{}
|
||||
|
||||
// CreateXPathNavigator creates a new xpath.NodeNavigator for the specified html.Node.
|
||||
func CreateXPathNavigator(top *html.Node) *NodeNavigator {
|
||||
return &NodeNavigator{curr: top, root: top, attr: -1}
|
||||
}
|
||||
|
||||
// Find is like QueryAll but Will panics if the expression `expr` cannot be parsed.
|
||||
//
|
||||
// See `QueryAll()` function.
|
||||
func Find(top *html.Node, expr string) []*html.Node {
|
||||
nodes, err := QueryAll(top, expr)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return nodes
|
||||
}
|
||||
|
||||
// FindOne is like Query but will panics if the expression `expr` cannot be parsed.
|
||||
// See `Query()` function.
|
||||
func FindOne(top *html.Node, expr string) *html.Node {
|
||||
node, err := Query(top, expr)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return node
|
||||
}
|
||||
|
||||
// QueryAll searches the html.Node that matches by the specified XPath expr.
|
||||
// Return an error if the expression `expr` cannot be parsed.
|
||||
func QueryAll(top *html.Node, expr string) ([]*html.Node, error) {
|
||||
exp, err := getQuery(expr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
nodes := QuerySelectorAll(top, exp)
|
||||
return nodes, nil
|
||||
}
|
||||
|
||||
// Query runs the given XPath expression against the given html.Node and
|
||||
// returns the first matching html.Node, or nil if no matches are found.
|
||||
//
|
||||
// Returns an error if the expression `expr` cannot be parsed.
|
||||
func Query(top *html.Node, expr string) (*html.Node, error) {
|
||||
exp, err := getQuery(expr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return QuerySelector(top, exp), nil
|
||||
}
|
||||
|
||||
// QuerySelector returns the first matched html.Node by the specified XPath selector.
|
||||
func QuerySelector(top *html.Node, selector *xpath.Expr) *html.Node {
|
||||
t := selector.Select(CreateXPathNavigator(top))
|
||||
if t.MoveNext() {
|
||||
return getCurrentNode(t.Current().(*NodeNavigator))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// QuerySelectorAll searches all of the html.Node that matches the specified XPath selectors.
|
||||
func QuerySelectorAll(top *html.Node, selector *xpath.Expr) []*html.Node {
|
||||
var elems []*html.Node
|
||||
t := selector.Select(CreateXPathNavigator(top))
|
||||
for t.MoveNext() {
|
||||
nav := t.Current().(*NodeNavigator)
|
||||
n := getCurrentNode(nav)
|
||||
elems = append(elems, n)
|
||||
}
|
||||
return elems
|
||||
}
|
||||
|
||||
// LoadURL loads the HTML document from the specified URL. Default enabling gzip on a HTTP request.
|
||||
func LoadURL(url string) (*html.Node, error) {
|
||||
req, err := http.NewRequest("GET", url, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// Enable gzip compression.
|
||||
req.Header.Add("Accept-Encoding", "gzip")
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var reader io.ReadCloser
|
||||
|
||||
defer func() {
|
||||
if reader != nil {
|
||||
reader.Close()
|
||||
}
|
||||
}()
|
||||
encoding := resp.Header.Get("Content-Encoding")
|
||||
switch encoding {
|
||||
case "gzip":
|
||||
reader, err = gzip.NewReader(resp.Body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
case "deflate":
|
||||
reader, err = zlib.NewReader(resp.Body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
case "":
|
||||
reader = resp.Body
|
||||
default:
|
||||
return nil, fmt.Errorf("%s compression is not support", encoding)
|
||||
}
|
||||
|
||||
r, err := charset.NewReader(reader, resp.Header.Get("Content-Type"))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return html.Parse(r)
|
||||
}
|
||||
|
||||
// LoadDoc loads the HTML document from the specified file path.
|
||||
func LoadDoc(path string) (*html.Node, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
return html.Parse(bufio.NewReader(f))
|
||||
}
|
||||
|
||||
func getCurrentNode(n *NodeNavigator) *html.Node {
|
||||
if n.NodeType() == xpath.AttributeNode {
|
||||
childNode := &html.Node{
|
||||
Type: html.TextNode,
|
||||
Data: n.Value(),
|
||||
}
|
||||
return &html.Node{
|
||||
Type: html.ElementNode,
|
||||
Data: n.LocalName(),
|
||||
FirstChild: childNode,
|
||||
LastChild: childNode,
|
||||
}
|
||||
|
||||
}
|
||||
return n.curr
|
||||
}
|
||||
|
||||
// Parse returns the parse tree for the HTML from the given Reader.
|
||||
func Parse(r io.Reader) (*html.Node, error) {
|
||||
return html.Parse(r)
|
||||
}
|
||||
|
||||
// InnerText returns the text between the start and end tags of the object.
|
||||
func InnerText(n *html.Node) string {
|
||||
var output func(*strings.Builder, *html.Node)
|
||||
output = func(b *strings.Builder, n *html.Node) {
|
||||
switch n.Type {
|
||||
case html.TextNode:
|
||||
b.WriteString(n.Data)
|
||||
return
|
||||
case html.CommentNode:
|
||||
return
|
||||
}
|
||||
for child := n.FirstChild; child != nil; child = child.NextSibling {
|
||||
output(b, child)
|
||||
}
|
||||
}
|
||||
|
||||
var b strings.Builder
|
||||
output(&b, n)
|
||||
return b.String()
|
||||
}
|
||||
|
||||
// SelectAttr returns the attribute value with the specified name.
|
||||
func SelectAttr(n *html.Node, name string) (val string) {
|
||||
if n == nil {
|
||||
return
|
||||
}
|
||||
if n.Type == html.ElementNode && n.Parent == nil && name == n.Data {
|
||||
return InnerText(n)
|
||||
}
|
||||
for _, attr := range n.Attr {
|
||||
if attr.Key == name {
|
||||
val = attr.Val
|
||||
break
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// ExistsAttr returns whether attribute with specified name exists.
|
||||
func ExistsAttr(n *html.Node, name string) bool {
|
||||
if n == nil {
|
||||
return false
|
||||
}
|
||||
for _, attr := range n.Attr {
|
||||
if attr.Key == name {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// OutputHTML returns the text including tags name.
|
||||
func OutputHTML(n *html.Node, self bool) string {
|
||||
var b strings.Builder
|
||||
if self {
|
||||
html.Render(&b, n)
|
||||
} else {
|
||||
for n := n.FirstChild; n != nil; n = n.NextSibling {
|
||||
html.Render(&b, n)
|
||||
}
|
||||
}
|
||||
return b.String()
|
||||
}
|
||||
|
||||
type NodeNavigator struct {
|
||||
root, curr *html.Node
|
||||
attr int
|
||||
}
|
||||
|
||||
func (h *NodeNavigator) Current() *html.Node {
|
||||
return h.curr
|
||||
}
|
||||
|
||||
func (h *NodeNavigator) NodeType() xpath.NodeType {
|
||||
switch h.curr.Type {
|
||||
case html.CommentNode:
|
||||
return xpath.CommentNode
|
||||
case html.TextNode:
|
||||
return xpath.TextNode
|
||||
case html.DocumentNode:
|
||||
return xpath.RootNode
|
||||
case html.ElementNode:
|
||||
if h.attr != -1 {
|
||||
return xpath.AttributeNode
|
||||
}
|
||||
return xpath.ElementNode
|
||||
case html.DoctypeNode:
|
||||
// ignored <!DOCTYPE HTML> declare and as Root-Node type.
|
||||
return xpath.RootNode
|
||||
}
|
||||
panic(fmt.Sprintf("unknown HTML node type: %v", h.curr.Type))
|
||||
}
|
||||
|
||||
func (h *NodeNavigator) LocalName() string {
|
||||
if h.attr != -1 {
|
||||
return h.curr.Attr[h.attr].Key
|
||||
}
|
||||
return h.curr.Data
|
||||
}
|
||||
|
||||
func (*NodeNavigator) Prefix() string {
|
||||
return ""
|
||||
}
|
||||
|
||||
func (h *NodeNavigator) Value() string {
|
||||
switch h.curr.Type {
|
||||
case html.CommentNode:
|
||||
return h.curr.Data
|
||||
case html.ElementNode:
|
||||
if h.attr != -1 {
|
||||
return h.curr.Attr[h.attr].Val
|
||||
}
|
||||
return InnerText(h.curr)
|
||||
case html.TextNode:
|
||||
return h.curr.Data
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (h *NodeNavigator) Copy() xpath.NodeNavigator {
|
||||
n := *h
|
||||
return &n
|
||||
}
|
||||
|
||||
func (h *NodeNavigator) MoveToRoot() {
|
||||
h.curr = h.root
|
||||
}
|
||||
|
||||
func (h *NodeNavigator) MoveToParent() bool {
|
||||
if h.attr != -1 {
|
||||
h.attr = -1
|
||||
return true
|
||||
} else if node := h.curr.Parent; node != nil {
|
||||
h.curr = node
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (h *NodeNavigator) MoveToNextAttribute() bool {
|
||||
if h.attr >= len(h.curr.Attr)-1 {
|
||||
return false
|
||||
}
|
||||
h.attr++
|
||||
return true
|
||||
}
|
||||
|
||||
func (h *NodeNavigator) MoveToChild() bool {
|
||||
if h.attr != -1 {
|
||||
return false
|
||||
}
|
||||
if node := h.curr.FirstChild; node != nil {
|
||||
h.curr = node
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (h *NodeNavigator) MoveToFirst() bool {
|
||||
if h.attr != -1 || h.curr.PrevSibling == nil {
|
||||
return false
|
||||
}
|
||||
for {
|
||||
node := h.curr.PrevSibling
|
||||
if node == nil {
|
||||
break
|
||||
}
|
||||
h.curr = node
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (h *NodeNavigator) String() string {
|
||||
return h.Value()
|
||||
}
|
||||
|
||||
func (h *NodeNavigator) MoveToNext() bool {
|
||||
if h.attr != -1 {
|
||||
return false
|
||||
}
|
||||
if node := h.curr.NextSibling; node != nil {
|
||||
h.curr = node
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (h *NodeNavigator) MoveToPrevious() bool {
|
||||
if h.attr != -1 {
|
||||
return false
|
||||
}
|
||||
if node := h.curr.PrevSibling; node != nil {
|
||||
h.curr = node
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (h *NodeNavigator) MoveTo(other xpath.NodeNavigator) bool {
|
||||
node, ok := other.(*NodeNavigator)
|
||||
if !ok || node.root != h.root {
|
||||
return false
|
||||
}
|
||||
|
||||
h.curr = node.curr
|
||||
h.attr = node.attr
|
||||
return true
|
||||
}
|
||||
+32
@@ -0,0 +1,32 @@
|
||||
# vscode
|
||||
.vscode
|
||||
debug
|
||||
*.test
|
||||
|
||||
./build
|
||||
|
||||
# Compiled Object files, Static and Dynamic libs (Shared Objects)
|
||||
*.o
|
||||
*.a
|
||||
*.so
|
||||
|
||||
|
||||
# Folders
|
||||
_obj
|
||||
_test
|
||||
|
||||
# Architecture specific extensions/prefixes
|
||||
*.[568vq]
|
||||
[568vq].out
|
||||
|
||||
*.cgo1.go
|
||||
*.cgo2.c
|
||||
_cgo_defun.c
|
||||
_cgo_gotypes.go
|
||||
_cgo_export.*
|
||||
|
||||
_testmain.go
|
||||
|
||||
*.exe
|
||||
*.test
|
||||
*.prof
|
||||
+17
@@ -0,0 +1,17 @@
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
+302
@@ -0,0 +1,302 @@
|
||||
# xmlquery
|
||||
|
||||
[](https://github.com/antchfx/xmlquery/actions/workflows/testing.yml)
|
||||
[](https://godoc.org/github.com/antchfx/xmlquery)
|
||||
[](https://goreportcard.com/report/github.com/antchfx/xmlquery)
|
||||
|
||||
# Overview
|
||||
|
||||
`xmlquery` is an XPath query package for XML documents, allowing you to extract
|
||||
data or evaluate from XML documents with an XPath expression.
|
||||
|
||||
`xmlquery` has a built-in query object caching feature that caches recently used
|
||||
XPATH query strings. Enabling caching can avoid recompile XPath expression for
|
||||
each query.
|
||||
|
||||
You can visit this page to learn about the supported XPath(1.0/2.0) syntax. https://github.com/antchfx/xpath
|
||||
|
||||
[htmlquery](https://github.com/antchfx/htmlquery) - Package for the HTML document query.
|
||||
|
||||
[xmlquery](https://github.com/antchfx/xmlquery) - Package for the XML document query.
|
||||
|
||||
[jsonquery](https://github.com/antchfx/jsonquery) - Package for the JSON document query.
|
||||
|
||||
# Installation
|
||||
|
||||
```
|
||||
$ go get github.com/antchfx/xmlquery
|
||||
```
|
||||
|
||||
# Quick Starts
|
||||
|
||||
```go
|
||||
import (
|
||||
"github.com/antchfx/xmlquery"
|
||||
)
|
||||
|
||||
func main(){
|
||||
s := `<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
<title>W3Schools Home Page</title>
|
||||
<link>https://www.w3schools.com</link>
|
||||
<description>Free web building tutorials</description>
|
||||
<item>
|
||||
<title>RSS Tutorial</title>
|
||||
<link>https://www.w3schools.com/xml/xml_rss.asp</link>
|
||||
<description>New RSS tutorial on W3Schools</description>
|
||||
</item>
|
||||
<item>
|
||||
<title>XML Tutorial</title>
|
||||
<link>https://www.w3schools.com/xml</link>
|
||||
<description>New XML tutorial on W3Schools</description>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
doc, err := xmlquery.Parse(strings.NewReader(s))
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
channel := xmlquery.FindOne(doc, "//channel")
|
||||
if n := channel.SelectElement("title"); n != nil {
|
||||
fmt.Printf("title: %s\n", n.InnerText())
|
||||
}
|
||||
if n := channel.SelectElement("link"); n != nil {
|
||||
fmt.Printf("link: %s\n", n.InnerText())
|
||||
}
|
||||
for i, n := range xmlquery.Find(doc, "//item/title") {
|
||||
fmt.Printf("#%d %s\n", i, n.InnerText())
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
# Getting Started
|
||||
|
||||
### Find specified XPath query.
|
||||
|
||||
```go
|
||||
list, err := xmlquery.QueryAll(doc, "a")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
```
|
||||
|
||||
#### Parse an XML from URL.
|
||||
|
||||
```go
|
||||
doc, err := xmlquery.LoadURL("http://www.example.com/sitemap.xml")
|
||||
```
|
||||
|
||||
#### Parse an XML from string.
|
||||
|
||||
```go
|
||||
s := `<?xml version="1.0" encoding="utf-8"?><rss version="2.0"></rss>`
|
||||
doc, err := xmlquery.Parse(strings.NewReader(s))
|
||||
```
|
||||
|
||||
#### Parse an XML from io.Reader.
|
||||
|
||||
```go
|
||||
f, err := os.Open("../books.xml")
|
||||
doc, err := xmlquery.Parse(f)
|
||||
```
|
||||
|
||||
#### Parse an XML in a stream fashion (simple case without elements filtering).
|
||||
|
||||
```go
|
||||
f, _ := os.Open("../books.xml")
|
||||
p, err := xmlquery.CreateStreamParser(f, "/bookstore/book")
|
||||
for {
|
||||
n, err := p.Read()
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
fmt.Println(n)
|
||||
}
|
||||
```
|
||||
|
||||
Notes: `CreateStreamParser()` used for saving memory if your had a large XML file to parse.
|
||||
|
||||
#### Parse an XML in a stream fashion (simple case advanced element filtering).
|
||||
|
||||
```go
|
||||
f, _ := os.Open("../books.xml")
|
||||
p, err := xmlquery.CreateStreamParser(f, "/bookstore/book", "/bookstore/book[price>=10]")
|
||||
for {
|
||||
n, err := p.Read()
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
fmt.Println(n)
|
||||
}
|
||||
```
|
||||
|
||||
#### Find authors of all books in the bookstore.
|
||||
|
||||
```go
|
||||
list := xmlquery.Find(doc, "//book//author")
|
||||
// or
|
||||
list := xmlquery.Find(doc, "//author")
|
||||
```
|
||||
|
||||
#### Find the second book.
|
||||
|
||||
```go
|
||||
book := xmlquery.FindOne(doc, "//book[2]")
|
||||
```
|
||||
|
||||
#### Find the last book.
|
||||
|
||||
```go
|
||||
book := xmlquery.FindOne(doc, "//book[last()]")
|
||||
```
|
||||
|
||||
#### Find all book elements and only get `id` attribute.
|
||||
|
||||
```go
|
||||
list := xmlquery.Find(doc,"//book/@id")
|
||||
fmt.Println(list[0].InnerText) // outout @id value
|
||||
```
|
||||
|
||||
#### Find all books with id `bk104`.
|
||||
|
||||
```go
|
||||
list := xmlquery.Find(doc, "//book[@id='bk104']")
|
||||
```
|
||||
|
||||
#### Find all books with price less than 5.
|
||||
|
||||
```go
|
||||
list := xmlquery.Find(doc, "//book[price<5]")
|
||||
```
|
||||
|
||||
#### Evaluate total price of all books.
|
||||
|
||||
```go
|
||||
expr, err := xpath.Compile("sum(//book/price)")
|
||||
price := expr.Evaluate(xmlquery.CreateXPathNavigator(doc)).(float64)
|
||||
fmt.Printf("total price: %f\n", price)
|
||||
```
|
||||
|
||||
#### Count the number of books.
|
||||
|
||||
```go
|
||||
expr, err := xpath.Compile("count(//book)")
|
||||
count := expr.Evaluate(xmlquery.CreateXPathNavigator(doc)).(float64)
|
||||
```
|
||||
|
||||
#### Calculate the total price of all book prices.
|
||||
|
||||
```go
|
||||
expr, err := xpath.Compile("sum(//book/price)")
|
||||
price := expr.Evaluate(xmlquery.CreateXPathNavigator(doc)).(float64)
|
||||
```
|
||||
|
||||
# Advanced Features
|
||||
|
||||
### Parse `UTF-16` XML file with `ParseWithOptions()`.
|
||||
|
||||
```go
|
||||
f, _ := os.Open(`UTF-16.XML`)
|
||||
// Convert UTF-16 XML to UTF-8
|
||||
utf16ToUtf8Transformer := unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM).NewDecoder()
|
||||
utf8Reader := transform.NewReader(f, utf16ToUtf8Transformer)
|
||||
// Sets `CharsetReader`
|
||||
options := xmlquery.ParserOptions{
|
||||
Decoder: &xmlquery.DecoderOptions{
|
||||
CharsetReader: func(charset string, input io.Reader) (io.Reader, error) {
|
||||
return input, nil
|
||||
},
|
||||
},
|
||||
}
|
||||
doc, err := xmlquery.ParseWithOptions(utf8Reader, options)
|
||||
```
|
||||
|
||||
### Query with custom namespace prefix.
|
||||
|
||||
```go
|
||||
s := `<?xml version="1.0" encoding="UTF-8"?>
|
||||
<pd:ProcessDefinition xmlns:pd="http://xmlns.xyz.com/process/2003" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:xsd="http://www.w3.org/2001/XMLSchema">
|
||||
<pd:activity name="Invoke Request-Response Service">
|
||||
<pd:type>RequestReplyActivity</pd:type>
|
||||
<pd:resourceType>OpClientReqActivity</pd:resourceType>
|
||||
<pd:x>300</pd:x>
|
||||
<pd:y>80</pd:y>
|
||||
</pd:activity>
|
||||
</pd:ProcessDefinition>`
|
||||
nsMap := map[string]string{
|
||||
"q": "http://xmlns.xyz.com/process/2003",
|
||||
"r": "http://www.w3.org/1999/XSL/Transform",
|
||||
"s": "http://www.w3.org/2001/XMLSchema",
|
||||
}
|
||||
expr, _ := xpath.CompileWithNS("//q:activity", nsMap)
|
||||
node := xmlquery.QuerySelector(doc, expr)
|
||||
```
|
||||
|
||||
#### Create XML document without call `xml.Marshal`.
|
||||
|
||||
```go
|
||||
doc := &xmlquery.Node{
|
||||
Type: xmlquery.DeclarationNode,
|
||||
Data: "xml",
|
||||
Attr: []xml.Attr{
|
||||
xml.Attr{Name: xml.Name{Local: "version"}, Value: "1.0"},
|
||||
},
|
||||
}
|
||||
root := &xmlquery.Node{
|
||||
Data: "rss",
|
||||
Type: xmlquery.ElementNode,
|
||||
}
|
||||
doc.FirstChild = root
|
||||
channel := &xmlquery.Node{
|
||||
Data: "channel",
|
||||
Type: xmlquery.ElementNode,
|
||||
}
|
||||
root.FirstChild = channel
|
||||
title := &xmlquery.Node{
|
||||
Data: "title",
|
||||
Type: xmlquery.ElementNode,
|
||||
}
|
||||
title_text := &xmlquery.Node{
|
||||
Data: "W3Schools Home Page",
|
||||
Type: xmlquery.TextNode,
|
||||
}
|
||||
title.FirstChild = title_text
|
||||
channel.FirstChild = title
|
||||
|
||||
fmt.Println(doc.OutputXML(true))
|
||||
fmt.Println(doc.OutputXMLWithOptions(WithOutputSelf()))
|
||||
```
|
||||
|
||||
Output:
|
||||
|
||||
```xml
|
||||
<?xml version="1.0"?><rss><channel><title>W3Schools Home Page</title></channel></rss>
|
||||
```
|
||||
|
||||
# FAQ
|
||||
|
||||
#### `Find()` vs `QueryAll()`, which is better?
|
||||
|
||||
`Find` and `QueryAll` both do the same thing: searches all of matched XML nodes.
|
||||
`Find` panics if provided with an invalid XPath query, while `QueryAll` returns
|
||||
an error.
|
||||
|
||||
#### Can I save my query expression object for the next query?
|
||||
|
||||
Yes, you can. We provide `QuerySelector` and `QuerySelectorAll` methods; they
|
||||
accept your query expression object.
|
||||
|
||||
Caching a query expression object avoids recompiling the XPath query
|
||||
expression, improving query performance.
|
||||
|
||||
# Questions
|
||||
|
||||
Please let me know if you have any questions
|
||||
+43
@@ -0,0 +1,43 @@
|
||||
package xmlquery
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/golang/groupcache/lru"
|
||||
|
||||
"github.com/antchfx/xpath"
|
||||
)
|
||||
|
||||
// DisableSelectorCache will disable caching for the query selector if value is true.
|
||||
var DisableSelectorCache = false
|
||||
|
||||
// SelectorCacheMaxEntries allows how many selector object can be caching. Default is 50.
|
||||
// Will disable caching if SelectorCacheMaxEntries <= 0.
|
||||
var SelectorCacheMaxEntries = 50
|
||||
|
||||
var (
|
||||
cacheOnce sync.Once
|
||||
cache *lru.Cache
|
||||
cacheMutex sync.Mutex
|
||||
)
|
||||
|
||||
func getQuery(expr string) (*xpath.Expr, error) {
|
||||
if DisableSelectorCache || SelectorCacheMaxEntries <= 0 {
|
||||
return xpath.Compile(expr)
|
||||
}
|
||||
cacheOnce.Do(func() {
|
||||
cache = lru.New(SelectorCacheMaxEntries)
|
||||
})
|
||||
cacheMutex.Lock()
|
||||
defer cacheMutex.Unlock()
|
||||
if v, ok := cache.Get(expr); ok {
|
||||
return v.(*xpath.Expr), nil
|
||||
}
|
||||
v, err := xpath.Compile(expr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
cache.Add(expr, v)
|
||||
return v, nil
|
||||
|
||||
}
|
||||
+79
@@ -0,0 +1,79 @@
|
||||
package xmlquery
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
)
|
||||
|
||||
type cachedReader struct {
|
||||
buffer *bufio.Reader
|
||||
cache []byte
|
||||
caching bool
|
||||
}
|
||||
|
||||
func newCachedReader(r *bufio.Reader) *cachedReader {
|
||||
return &cachedReader{
|
||||
buffer: r,
|
||||
cache: make([]byte, 0, 4096),
|
||||
caching: false,
|
||||
}
|
||||
}
|
||||
|
||||
func (c *cachedReader) StartCaching() {
|
||||
c.cache = c.cache[:0]
|
||||
c.caching = true
|
||||
}
|
||||
|
||||
func (c *cachedReader) ReadByte() (b byte, err error) {
|
||||
b, err = c.buffer.ReadByte()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
if c.caching {
|
||||
c.cacheByte(b)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (c *cachedReader) Cache() []byte {
|
||||
return c.cache
|
||||
}
|
||||
|
||||
func (c *cachedReader) CacheWithLimit(n int) []byte {
|
||||
if n < 1 {
|
||||
return nil
|
||||
}
|
||||
l := len(c.cache)
|
||||
if n > l {
|
||||
n = l
|
||||
}
|
||||
return c.cache[:n]
|
||||
}
|
||||
|
||||
func (c *cachedReader) StopCaching() {
|
||||
c.caching = false
|
||||
}
|
||||
|
||||
func (c *cachedReader) Read(p []byte) (int, error) {
|
||||
n, err := c.buffer.Read(p)
|
||||
if err != nil {
|
||||
return n, err
|
||||
}
|
||||
if c.caching {
|
||||
for i := 0; i < n; i++ {
|
||||
if !c.cacheByte(p[i]) {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
|
||||
func (c *cachedReader) cacheByte(b byte) bool {
|
||||
n := len(c.cache)
|
||||
if n == cap(c.cache) {
|
||||
return false
|
||||
}
|
||||
c.cache = c.cache[:n+1]
|
||||
c.cache[n] = b
|
||||
return true
|
||||
}
|
||||
+477
@@ -0,0 +1,477 @@
|
||||
package xmlquery
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/xml"
|
||||
"fmt"
|
||||
"html"
|
||||
"io"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// A NodeType is the type of a Node.
|
||||
type NodeType uint
|
||||
|
||||
const (
|
||||
// DocumentNode is a document object that, as the root of the document tree,
|
||||
// provides access to the entire XML document.
|
||||
DocumentNode NodeType = iota
|
||||
// DeclarationNode is the document type declaration, indicated by the
|
||||
// following tag (for example, <!DOCTYPE...> ).
|
||||
DeclarationNode
|
||||
// ElementNode is an element (for example, <item> ).
|
||||
ElementNode
|
||||
// TextNode is the text content of a node.
|
||||
TextNode
|
||||
// CharDataNode node <![CDATA[content]]>
|
||||
CharDataNode
|
||||
// CommentNode a comment (for example, <!-- my comment --> ).
|
||||
CommentNode
|
||||
// AttributeNode is an attribute of element.
|
||||
AttributeNode
|
||||
// NotationNode is a directive represents in document (for example, <!text...>).
|
||||
NotationNode
|
||||
)
|
||||
|
||||
type Attr struct {
|
||||
Name xml.Name
|
||||
Value string
|
||||
NamespaceURI string
|
||||
}
|
||||
|
||||
// A Node consists of a NodeType and some Data (tag name for
|
||||
// element nodes, content for text) and are part of a tree of Nodes.
|
||||
type Node struct {
|
||||
Parent, FirstChild, LastChild, PrevSibling, NextSibling *Node
|
||||
|
||||
Type NodeType
|
||||
Data string
|
||||
Prefix string
|
||||
NamespaceURI string
|
||||
Attr []Attr
|
||||
|
||||
level int // node level in the tree
|
||||
}
|
||||
|
||||
type outputConfiguration struct {
|
||||
printSelf bool
|
||||
preserveSpaces bool
|
||||
emptyElementTagSupport bool
|
||||
skipComments bool
|
||||
useIndentation string
|
||||
}
|
||||
|
||||
type OutputOption func(*outputConfiguration)
|
||||
|
||||
// WithOutputSelf configures the Node to print the root node itself
|
||||
func WithOutputSelf() OutputOption {
|
||||
return func(oc *outputConfiguration) {
|
||||
oc.printSelf = true
|
||||
}
|
||||
}
|
||||
|
||||
// WithEmptyTagSupport empty tags should be written as <empty/> and
|
||||
// not as <empty></empty>
|
||||
func WithEmptyTagSupport() OutputOption {
|
||||
return func(oc *outputConfiguration) {
|
||||
oc.emptyElementTagSupport = true
|
||||
}
|
||||
}
|
||||
|
||||
// WithoutComments will skip comments in output
|
||||
func WithoutComments() OutputOption {
|
||||
return func(oc *outputConfiguration) {
|
||||
oc.skipComments = true
|
||||
}
|
||||
}
|
||||
|
||||
// WithPreserveSpace will preserve spaces in output
|
||||
func WithPreserveSpace() OutputOption {
|
||||
return func(oc *outputConfiguration) {
|
||||
oc.preserveSpaces = true
|
||||
}
|
||||
}
|
||||
|
||||
// WithoutPreserveSpace will not preserve spaces in output
|
||||
func WithoutPreserveSpace() OutputOption {
|
||||
return func(oc *outputConfiguration) {
|
||||
oc.preserveSpaces = false
|
||||
}
|
||||
}
|
||||
|
||||
// WithIndentation sets the indentation string used for formatting the output.
|
||||
func WithIndentation(indentation string) OutputOption {
|
||||
return func(oc *outputConfiguration) {
|
||||
oc.useIndentation = indentation
|
||||
}
|
||||
}
|
||||
|
||||
func newXMLName(name string) xml.Name {
|
||||
if i := strings.IndexByte(name, ':'); i > 0 {
|
||||
return xml.Name{
|
||||
Space: name[:i],
|
||||
Local: name[i+1:],
|
||||
}
|
||||
}
|
||||
return xml.Name{
|
||||
Local: name,
|
||||
}
|
||||
}
|
||||
|
||||
func (n *Node) Level() int {
|
||||
return n.level
|
||||
}
|
||||
|
||||
// InnerText returns the text between the start and end tags of the object.
|
||||
func (n *Node) InnerText() string {
|
||||
var output func(*strings.Builder, *Node)
|
||||
output = func(b *strings.Builder, n *Node) {
|
||||
switch n.Type {
|
||||
case TextNode, CharDataNode:
|
||||
b.WriteString(n.Data)
|
||||
case CommentNode:
|
||||
default:
|
||||
for child := n.FirstChild; child != nil; child = child.NextSibling {
|
||||
output(b, child)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var b strings.Builder
|
||||
output(&b, n)
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func (n *Node) sanitizedData(preserveSpaces bool) string {
|
||||
if preserveSpaces {
|
||||
return n.Data
|
||||
}
|
||||
return strings.TrimSpace(n.Data)
|
||||
}
|
||||
|
||||
func calculatePreserveSpaces(n *Node, pastValue bool) bool {
|
||||
if attr := n.SelectAttr("xml:space"); attr == "preserve" {
|
||||
return true
|
||||
} else if attr == "default" {
|
||||
return false
|
||||
}
|
||||
return pastValue
|
||||
}
|
||||
|
||||
type indentation struct {
|
||||
level int
|
||||
hasChild bool
|
||||
indent string
|
||||
w io.Writer
|
||||
}
|
||||
|
||||
func newIndentation(indent string, w io.Writer) *indentation {
|
||||
if indent == "" {
|
||||
return nil
|
||||
}
|
||||
return &indentation{
|
||||
indent: indent,
|
||||
w: w,
|
||||
}
|
||||
}
|
||||
|
||||
func (i *indentation) NewLine() (err error) {
|
||||
if i == nil {
|
||||
return
|
||||
}
|
||||
_, err = io.WriteString(i.w, "\n")
|
||||
return
|
||||
}
|
||||
|
||||
func (i *indentation) Open() (err error) {
|
||||
if i == nil {
|
||||
return
|
||||
}
|
||||
|
||||
if err = i.writeIndent(); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
i.level++
|
||||
i.hasChild = false
|
||||
return
|
||||
}
|
||||
|
||||
func (i *indentation) Close() (err error) {
|
||||
if i == nil {
|
||||
return
|
||||
}
|
||||
i.level--
|
||||
if i.hasChild {
|
||||
if err = i.writeIndent(); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
i.hasChild = true
|
||||
return
|
||||
}
|
||||
|
||||
func (i *indentation) writeIndent() (err error) {
|
||||
_, err = io.WriteString(i.w, "\n")
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
_, err = io.WriteString(i.w, strings.Repeat(i.indent, i.level))
|
||||
return
|
||||
}
|
||||
|
||||
func outputXML(w io.Writer, n *Node, preserveSpaces bool, config *outputConfiguration, indent *indentation) (err error) {
|
||||
preserveSpaces = calculatePreserveSpaces(n, preserveSpaces)
|
||||
switch n.Type {
|
||||
case TextNode:
|
||||
_, err = io.WriteString(w, html.EscapeString(n.sanitizedData(preserveSpaces)))
|
||||
return
|
||||
case CharDataNode:
|
||||
_, err = fmt.Fprintf(w, "<![CDATA[%v]]>", n.Data)
|
||||
return
|
||||
case CommentNode:
|
||||
if !config.skipComments {
|
||||
_, err = fmt.Fprintf(w, "<!--%v-->", n.Data)
|
||||
}
|
||||
return
|
||||
case NotationNode:
|
||||
if err = indent.NewLine(); err != nil {
|
||||
return
|
||||
}
|
||||
_, err = fmt.Fprintf(w, "<!%s>", n.Data)
|
||||
return
|
||||
case DeclarationNode:
|
||||
_, err = io.WriteString(w, "<?"+n.Data)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
default:
|
||||
if err = indent.Open(); err != nil {
|
||||
return
|
||||
}
|
||||
if n.Prefix == "" {
|
||||
_, err = io.WriteString(w, "<"+n.Data)
|
||||
} else {
|
||||
_, err = fmt.Fprintf(w, "<%s:%s", n.Prefix, n.Data)
|
||||
}
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
for _, attr := range n.Attr {
|
||||
if attr.Name.Space != "" {
|
||||
_, err = fmt.Fprintf(w, ` %s:%s=`, attr.Name.Space, attr.Name.Local)
|
||||
} else {
|
||||
_, err = fmt.Fprintf(w, ` %s=`, attr.Name.Local)
|
||||
}
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
_, err = fmt.Fprintf(w, `"%v"`, html.EscapeString(attr.Value))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
if n.Type == DeclarationNode {
|
||||
_, err = io.WriteString(w, "?>")
|
||||
} else {
|
||||
if n.FirstChild != nil || !config.emptyElementTagSupport {
|
||||
_, err = io.WriteString(w, ">")
|
||||
} else {
|
||||
_, err = io.WriteString(w, "/>")
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
err = indent.Close()
|
||||
return
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
for child := n.FirstChild; child != nil; child = child.NextSibling {
|
||||
err = outputXML(w, child, preserveSpaces, config, indent)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
if n.Type != DeclarationNode {
|
||||
if err = indent.Close(); err != nil {
|
||||
return
|
||||
}
|
||||
if n.Prefix == "" {
|
||||
_, err = fmt.Fprintf(w, "</%s>", n.Data)
|
||||
} else {
|
||||
_, err = fmt.Fprintf(w, "</%s:%s>", n.Prefix, n.Data)
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// OutputXML returns the text that including tags name.
|
||||
func (n *Node) OutputXML(self bool) string {
|
||||
if self {
|
||||
return n.OutputXMLWithOptions(WithOutputSelf())
|
||||
}
|
||||
return n.OutputXMLWithOptions()
|
||||
}
|
||||
|
||||
// OutputXMLWithOptions returns the text that including tags name.
|
||||
func (n *Node) OutputXMLWithOptions(opts ...OutputOption) string {
|
||||
var b strings.Builder
|
||||
n.WriteWithOptions(&b, opts...)
|
||||
return b.String()
|
||||
}
|
||||
|
||||
// Write writes xml to given writer.
|
||||
func (n *Node) Write(writer io.Writer, self bool) error {
|
||||
if self {
|
||||
return n.WriteWithOptions(writer, WithOutputSelf())
|
||||
}
|
||||
return n.WriteWithOptions(writer)
|
||||
}
|
||||
|
||||
// WriteWithOptions writes xml with given options to given writer.
|
||||
func (n *Node) WriteWithOptions(writer io.Writer, opts ...OutputOption) (err error) {
|
||||
config := &outputConfiguration{
|
||||
preserveSpaces: true,
|
||||
}
|
||||
// Set the options
|
||||
for _, opt := range opts {
|
||||
opt(config)
|
||||
}
|
||||
pastPreserveSpaces := config.preserveSpaces
|
||||
preserveSpaces := calculatePreserveSpaces(n, pastPreserveSpaces)
|
||||
b := bufio.NewWriter(writer)
|
||||
defer b.Flush()
|
||||
|
||||
ident := newIndentation(config.useIndentation, b)
|
||||
if config.printSelf && n.Type != DocumentNode {
|
||||
err = outputXML(b, n, preserveSpaces, config, ident)
|
||||
} else {
|
||||
for n := n.FirstChild; n != nil; n = n.NextSibling {
|
||||
err = outputXML(b, n, preserveSpaces, config, ident)
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// AddAttr adds a new attribute specified by 'key' and 'val' to a node 'n'.
|
||||
func AddAttr(n *Node, key, val string) {
|
||||
attr := Attr{
|
||||
Name: newXMLName(key),
|
||||
Value: val,
|
||||
}
|
||||
n.Attr = append(n.Attr, attr)
|
||||
}
|
||||
|
||||
// SetAttr allows an attribute value with the specified name to be changed.
|
||||
// If the attribute did not previously exist, it will be created.
|
||||
func (n *Node) SetAttr(key, value string) {
|
||||
name := newXMLName(key)
|
||||
for i, attr := range n.Attr {
|
||||
if attr.Name == name {
|
||||
n.Attr[i].Value = value
|
||||
return
|
||||
}
|
||||
}
|
||||
AddAttr(n, key, value)
|
||||
}
|
||||
|
||||
// RemoveAttr removes the attribute with the specified name.
|
||||
func (n *Node) RemoveAttr(key string) {
|
||||
name := newXMLName(key)
|
||||
for i, attr := range n.Attr {
|
||||
if attr.Name == name {
|
||||
n.Attr = append(n.Attr[:i], n.Attr[i+1:]...)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// AddChild adds a new node 'n' to a node 'parent' as its last child.
|
||||
func AddChild(parent, n *Node) {
|
||||
n.Parent = parent
|
||||
n.NextSibling = nil
|
||||
if parent.FirstChild == nil {
|
||||
parent.FirstChild = n
|
||||
n.PrevSibling = nil
|
||||
} else {
|
||||
parent.LastChild.NextSibling = n
|
||||
n.PrevSibling = parent.LastChild
|
||||
}
|
||||
|
||||
parent.LastChild = n
|
||||
}
|
||||
|
||||
// AddSibling adds a new node 'n' as a last node of sibling chain for a given node 'sibling'.
|
||||
func AddSibling(sibling, n *Node) {
|
||||
for t := sibling.NextSibling; t != nil; t = t.NextSibling {
|
||||
sibling = t
|
||||
}
|
||||
n.Parent = sibling.Parent
|
||||
sibling.NextSibling = n
|
||||
n.PrevSibling = sibling
|
||||
n.NextSibling = nil
|
||||
if sibling.Parent != nil {
|
||||
sibling.Parent.LastChild = n
|
||||
}
|
||||
}
|
||||
|
||||
// AddImmediateSibling adds a new node 'n' as immediate sibling a given node 'sibling'.
|
||||
func AddImmediateSibling(sibling, n *Node) {
|
||||
n.Parent = sibling.Parent
|
||||
n.NextSibling = sibling.NextSibling
|
||||
sibling.NextSibling = n
|
||||
n.PrevSibling = sibling
|
||||
if n.NextSibling != nil {
|
||||
n.NextSibling.PrevSibling = n
|
||||
} else if n.Parent != nil {
|
||||
sibling.Parent.LastChild = n
|
||||
}
|
||||
}
|
||||
|
||||
// RemoveFromTree removes a node and its subtree from the document
|
||||
// tree it is in. If the node is the root of the tree, then it's no-op.
|
||||
func RemoveFromTree(n *Node) {
|
||||
if n.Parent == nil {
|
||||
return
|
||||
}
|
||||
if n.Parent.FirstChild == n {
|
||||
if n.Parent.LastChild == n {
|
||||
n.Parent.FirstChild = nil
|
||||
n.Parent.LastChild = nil
|
||||
} else {
|
||||
n.Parent.FirstChild = n.NextSibling
|
||||
n.NextSibling.PrevSibling = nil
|
||||
}
|
||||
} else {
|
||||
if n.Parent.LastChild == n {
|
||||
n.Parent.LastChild = n.PrevSibling
|
||||
n.PrevSibling.NextSibling = nil
|
||||
} else {
|
||||
n.PrevSibling.NextSibling = n.NextSibling
|
||||
n.NextSibling.PrevSibling = n.PrevSibling
|
||||
}
|
||||
}
|
||||
n.Parent = nil
|
||||
n.PrevSibling = nil
|
||||
n.NextSibling = nil
|
||||
}
|
||||
|
||||
// GetRoot returns a root of the tree where 'n' is a node.
|
||||
func GetRoot(n *Node) *Node {
|
||||
if n == nil {
|
||||
return nil
|
||||
}
|
||||
root := n
|
||||
for root.Parent != nil {
|
||||
root = root.Parent
|
||||
}
|
||||
return root
|
||||
}
|
||||
+33
@@ -0,0 +1,33 @@
|
||||
package xmlquery
|
||||
|
||||
import (
|
||||
"encoding/xml"
|
||||
"io"
|
||||
)
|
||||
|
||||
type ParserOptions struct {
|
||||
Decoder *DecoderOptions
|
||||
}
|
||||
|
||||
func (options ParserOptions) apply(parser *parser) {
|
||||
if options.Decoder != nil {
|
||||
(*options.Decoder).apply(parser.decoder)
|
||||
}
|
||||
}
|
||||
|
||||
// DecoderOptions implement the very same options than the standard
|
||||
// encoding/xml package. Please refer to this documentation:
|
||||
// https://golang.org/pkg/encoding/xml/#Decoder
|
||||
type DecoderOptions struct {
|
||||
Strict bool
|
||||
AutoClose []string
|
||||
Entity map[string]string
|
||||
CharsetReader func(charset string, input io.Reader) (io.Reader, error)
|
||||
}
|
||||
|
||||
func (options DecoderOptions) apply(decoder *xml.Decoder) {
|
||||
decoder.Strict = options.Strict
|
||||
decoder.AutoClose = options.AutoClose
|
||||
decoder.Entity = options.Entity
|
||||
decoder.CharsetReader = options.CharsetReader
|
||||
}
|
||||
+430
@@ -0,0 +1,430 @@
|
||||
package xmlquery
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"encoding/xml"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"regexp"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/antchfx/xpath"
|
||||
"golang.org/x/net/html/charset"
|
||||
)
|
||||
|
||||
var xmlMIMERegex = regexp.MustCompile(`(?i)((application|image|message|model)/((\w|\.|-)+\+?)?|text/)(wb)?xml`)
|
||||
|
||||
// LoadURL loads the XML document from the specified URL.
|
||||
func LoadURL(url string) (*Node, error) {
|
||||
resp, err := http.Get(url)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
// Make sure the Content-Type has a valid XML MIME type
|
||||
if xmlMIMERegex.MatchString(resp.Header.Get("Content-Type")) {
|
||||
return Parse(resp.Body)
|
||||
}
|
||||
return nil, fmt.Errorf("invalid XML document(%s)", resp.Header.Get("Content-Type"))
|
||||
}
|
||||
|
||||
// Parse returns the parse tree for the XML from the given Reader.
|
||||
func Parse(r io.Reader) (*Node, error) {
|
||||
return ParseWithOptions(r, ParserOptions{})
|
||||
}
|
||||
|
||||
// ParseWithOptions is like parse, but with custom options
|
||||
func ParseWithOptions(r io.Reader, options ParserOptions) (*Node, error) {
|
||||
p := createParser(r)
|
||||
options.apply(p)
|
||||
var err error
|
||||
for err == nil {
|
||||
_, err = p.parse()
|
||||
}
|
||||
|
||||
if err == io.EOF {
|
||||
// additional check for validity
|
||||
// according to: https://www.w3.org/TR/xml
|
||||
// the document MUST contain at least ONE element
|
||||
valid := false
|
||||
for doc := p.doc; doc != nil; doc = doc.NextSibling {
|
||||
for node := doc.FirstChild; node != nil; node = node.NextSibling {
|
||||
if node.Type == ElementNode {
|
||||
valid = true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if !valid {
|
||||
return nil, fmt.Errorf("xmlquery: invalid XML document")
|
||||
}
|
||||
return p.doc, nil
|
||||
}
|
||||
|
||||
return nil, err
|
||||
}
|
||||
|
||||
type parser struct {
|
||||
decoder *xml.Decoder
|
||||
doc *Node
|
||||
level int
|
||||
prev *Node
|
||||
streamElementXPath *xpath.Expr // Under streaming mode, this specifies the xpath to the target element node(s).
|
||||
streamElementFilter *xpath.Expr // If specified, it provides further filtering on the target element.
|
||||
streamNode *Node // Need to remember the last target node So we can clean it up upon next Read() call.
|
||||
streamNodePrev *Node // Need to remember target node's prev so upon target node removal, we can restore correct prev.
|
||||
reader *cachedReader // Need to maintain a reference to the reader, so we can determine whether a node contains CDATA.
|
||||
once sync.Once
|
||||
space2prefix map[string]*xmlnsPrefix
|
||||
}
|
||||
|
||||
type xmlnsPrefix struct {
|
||||
name string
|
||||
level int
|
||||
}
|
||||
|
||||
func createParser(r io.Reader) *parser {
|
||||
reader := newCachedReader(bufio.NewReader(r))
|
||||
p := &parser{
|
||||
decoder: xml.NewDecoder(reader),
|
||||
doc: &Node{Type: DocumentNode},
|
||||
level: 0,
|
||||
reader: reader,
|
||||
}
|
||||
if p.decoder.CharsetReader == nil {
|
||||
p.decoder.CharsetReader = charset.NewReaderLabel
|
||||
}
|
||||
p.prev = p.doc
|
||||
return p
|
||||
}
|
||||
|
||||
func (p *parser) parse() (*Node, error) {
|
||||
p.once.Do(func() {
|
||||
p.space2prefix = map[string]*xmlnsPrefix{"http://www.w3.org/XML/1998/namespace": {name: "xml", level: 0}}
|
||||
})
|
||||
|
||||
var streamElementNodeCounter int
|
||||
for {
|
||||
p.reader.StartCaching()
|
||||
tok, err := p.decoder.Token()
|
||||
p.reader.StopCaching()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
switch tok := tok.(type) {
|
||||
case xml.StartElement:
|
||||
if p.level == 0 {
|
||||
// mising XML declaration
|
||||
attributes := make([]Attr, 1)
|
||||
attributes[0].Name = xml.Name{Local: "version"}
|
||||
attributes[0].Value = "1.0"
|
||||
node := &Node{
|
||||
Type: DeclarationNode,
|
||||
Data: "xml",
|
||||
Attr: attributes,
|
||||
level: 1,
|
||||
}
|
||||
AddChild(p.prev, node)
|
||||
p.level = 1
|
||||
p.prev = node
|
||||
}
|
||||
|
||||
for _, att := range tok.Attr {
|
||||
if att.Name.Local == "xmlns" {
|
||||
// https://github.com/antchfx/xmlquery/issues/67
|
||||
if prefix, ok := p.space2prefix[att.Value]; !ok || (ok && prefix.level >= p.level) {
|
||||
p.space2prefix[att.Value] = &xmlnsPrefix{name: "", level: p.level} // reset empty if exist the default namespace
|
||||
}
|
||||
} else if att.Name.Space == "xmlns" {
|
||||
// maybe there are have duplicate NamespaceURL?
|
||||
p.space2prefix[att.Value] = &xmlnsPrefix{name: att.Name.Local, level: p.level}
|
||||
}
|
||||
}
|
||||
|
||||
if space := tok.Name.Space; space != "" {
|
||||
if _, found := p.space2prefix[space]; !found && p.decoder.Strict {
|
||||
return nil, fmt.Errorf("xmlquery: invalid XML document, namespace %s is missing", space)
|
||||
}
|
||||
}
|
||||
|
||||
attributes := make([]Attr, len(tok.Attr))
|
||||
for i, att := range tok.Attr {
|
||||
name := att.Name
|
||||
if prefix, ok := p.space2prefix[name.Space]; ok {
|
||||
name.Space = prefix.name
|
||||
}
|
||||
attributes[i] = Attr{
|
||||
Name: name,
|
||||
Value: att.Value,
|
||||
NamespaceURI: att.Name.Space,
|
||||
}
|
||||
}
|
||||
|
||||
node := &Node{
|
||||
Type: ElementNode,
|
||||
Data: tok.Name.Local,
|
||||
NamespaceURI: tok.Name.Space,
|
||||
Attr: attributes,
|
||||
level: p.level,
|
||||
}
|
||||
|
||||
if p.level == p.prev.level {
|
||||
AddSibling(p.prev, node)
|
||||
} else if p.level > p.prev.level {
|
||||
AddChild(p.prev, node)
|
||||
} else if p.level < p.prev.level {
|
||||
for i := p.prev.level - p.level; i > 1; i-- {
|
||||
p.prev = p.prev.Parent
|
||||
}
|
||||
AddSibling(p.prev.Parent, node)
|
||||
}
|
||||
|
||||
if node.NamespaceURI != "" {
|
||||
if v, ok := p.space2prefix[node.NamespaceURI]; ok {
|
||||
cached := string(p.reader.CacheWithLimit(len(v.name) + len(node.Data) + 2))
|
||||
if strings.HasPrefix(cached, fmt.Sprintf("%s:%s", v.name, node.Data)) || strings.HasPrefix(cached, fmt.Sprintf("<%s:%s", v.name, node.Data)) {
|
||||
node.Prefix = v.name
|
||||
}
|
||||
}
|
||||
}
|
||||
// If we're in the streaming mode, we need to remember the node if it is the target node
|
||||
// so that when we finish processing the node's EndElement, we know how/what to return to
|
||||
// caller. Also we need to remove the target node from the tree upon next Read() call so
|
||||
// memory doesn't grow unbounded.
|
||||
if p.streamElementXPath != nil {
|
||||
if p.streamNode == nil {
|
||||
if QuerySelector(p.doc, p.streamElementXPath) != nil {
|
||||
p.streamNode = node
|
||||
p.streamNodePrev = p.prev
|
||||
streamElementNodeCounter = 1
|
||||
}
|
||||
} else {
|
||||
streamElementNodeCounter++
|
||||
}
|
||||
}
|
||||
p.prev = node
|
||||
p.level++
|
||||
case xml.EndElement:
|
||||
p.level--
|
||||
// If we're in streaming mode, and we already have a potential streaming
|
||||
// target node identified (p.streamNode != nil) then we need to check if
|
||||
// this is the real one we want to return to caller.
|
||||
if p.streamNode != nil {
|
||||
streamElementNodeCounter--
|
||||
if streamElementNodeCounter == 0 {
|
||||
// Now we know this element node is the at least passing the initial
|
||||
// p.streamElementXPath check and is a potential target node candidate.
|
||||
// We need to have 1 more check with p.streamElementFilter (if given) to
|
||||
// ensure it is really the element node we want.
|
||||
// The reason we need a two-step check process is because the following
|
||||
// situation:
|
||||
// <AAA><BBB>b1</BBB></AAA>
|
||||
// And say the p.streamElementXPath = "/AAA/BBB[. != 'b1']". Now during
|
||||
// xml.StartElement time, the <BBB> node is still empty, so it will pass
|
||||
// the p.streamElementXPath check. However, eventually we know this <BBB>
|
||||
// shouldn't be returned to the caller. Having a second more fine-grained
|
||||
// filter check ensures that. So in this case, the caller should really
|
||||
// setup the stream parser with:
|
||||
// streamElementXPath = "/AAA/BBB["
|
||||
// streamElementFilter = "/AAA/BBB[. != 'b1']"
|
||||
if p.streamElementFilter == nil || QuerySelector(p.doc, p.streamElementFilter) != nil {
|
||||
return p.streamNode, nil
|
||||
}
|
||||
// otherwise, this isn't our target node, clean things up.
|
||||
// note we also remove the underlying *Node from the node tree, to prevent
|
||||
// future stream node candidate selection error.
|
||||
RemoveFromTree(p.streamNode)
|
||||
p.prev = p.streamNodePrev
|
||||
p.streamNode = nil
|
||||
p.streamNodePrev = nil
|
||||
}
|
||||
}
|
||||
case xml.CharData:
|
||||
// First, normalize the cache...
|
||||
cached := bytes.ToUpper(p.reader.CacheWithLimit(9))
|
||||
nodeType := TextNode
|
||||
if bytes.HasPrefix(cached, []byte("<![CDATA[")) || bytes.HasPrefix(cached, []byte("![CDATA[")) {
|
||||
nodeType = CharDataNode
|
||||
}
|
||||
node := &Node{Type: nodeType, Data: string(tok), level: p.level}
|
||||
if p.level == p.prev.level {
|
||||
AddSibling(p.prev, node)
|
||||
} else if p.level > p.prev.level {
|
||||
AddChild(p.prev, node)
|
||||
} else if p.level < p.prev.level {
|
||||
for i := p.prev.level - p.level; i > 1; i-- {
|
||||
p.prev = p.prev.Parent
|
||||
}
|
||||
AddSibling(p.prev.Parent, node)
|
||||
}
|
||||
case xml.Comment:
|
||||
node := &Node{Type: CommentNode, Data: string(tok), level: p.level}
|
||||
if p.level == p.prev.level {
|
||||
AddSibling(p.prev, node)
|
||||
} else if p.level > p.prev.level {
|
||||
AddChild(p.prev, node)
|
||||
} else if p.level < p.prev.level {
|
||||
for i := p.prev.level - p.level; i > 1; i-- {
|
||||
p.prev = p.prev.Parent
|
||||
}
|
||||
AddSibling(p.prev.Parent, node)
|
||||
}
|
||||
case xml.ProcInst: // Processing Instruction
|
||||
if p.prev.Type != DeclarationNode {
|
||||
p.level++
|
||||
}
|
||||
node := &Node{Type: DeclarationNode, Data: tok.Target, level: p.level}
|
||||
pairs := strings.Split(string(tok.Inst), " ")
|
||||
for _, pair := range pairs {
|
||||
pair = strings.TrimSpace(pair)
|
||||
if i := strings.Index(pair, "="); i > 0 {
|
||||
AddAttr(node, pair[:i], strings.Trim(pair[i+1:], `"'`))
|
||||
}
|
||||
}
|
||||
if p.level == p.prev.level {
|
||||
AddSibling(p.prev, node)
|
||||
} else if p.level > p.prev.level {
|
||||
AddChild(p.prev, node)
|
||||
} else if p.level < p.prev.level {
|
||||
for i := p.prev.level - p.level; i > 1; i-- {
|
||||
p.prev = p.prev.Parent
|
||||
}
|
||||
AddSibling(p.prev.Parent, node)
|
||||
}
|
||||
p.prev = node
|
||||
case xml.Directive:
|
||||
node := &Node{Type: NotationNode, Data: string(tok), level: p.level}
|
||||
if p.level == p.prev.level {
|
||||
AddSibling(p.prev, node)
|
||||
} else if p.level > p.prev.level {
|
||||
AddChild(p.prev, node)
|
||||
} else if p.level < p.prev.level {
|
||||
for i := p.prev.level - p.level; i > 1; i-- {
|
||||
p.prev = p.prev.Parent
|
||||
}
|
||||
AddSibling(p.prev.Parent, node)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// StreamParser enables loading and parsing an XML document in a streaming
|
||||
// fashion.
|
||||
type StreamParser struct {
|
||||
p *parser
|
||||
}
|
||||
|
||||
// CreateStreamParser creates a StreamParser. Argument streamElementXPath is
|
||||
// required.
|
||||
// Argument streamElementFilter is optional and should only be used in advanced
|
||||
// scenarios.
|
||||
//
|
||||
// Scenario 1: simple case:
|
||||
//
|
||||
// xml := `<AAA><BBB>b1</BBB><BBB>b2</BBB></AAA>`
|
||||
// sp, err := CreateStreamParser(strings.NewReader(xml), "/AAA/BBB")
|
||||
// if err != nil {
|
||||
// panic(err)
|
||||
// }
|
||||
// for {
|
||||
// n, err := sp.Read()
|
||||
// if err != nil {
|
||||
// break
|
||||
// }
|
||||
// fmt.Println(n.OutputXML(true))
|
||||
// }
|
||||
//
|
||||
// Output will be:
|
||||
//
|
||||
// <BBB>b1</BBB>
|
||||
// <BBB>b2</BBB>
|
||||
//
|
||||
// Scenario 2: advanced case:
|
||||
//
|
||||
// xml := `<AAA><BBB>b1</BBB><BBB>b2</BBB></AAA>`
|
||||
// sp, err := CreateStreamParser(strings.NewReader(xml), "/AAA/BBB", "/AAA/BBB[. != 'b1']")
|
||||
// if err != nil {
|
||||
// panic(err)
|
||||
// }
|
||||
// for {
|
||||
// n, err := sp.Read()
|
||||
// if err != nil {
|
||||
// break
|
||||
// }
|
||||
// fmt.Println(n.OutputXML(true))
|
||||
// }
|
||||
//
|
||||
// Output will be:
|
||||
//
|
||||
// <BBB>b2</BBB>
|
||||
//
|
||||
// As the argument names indicate, streamElementXPath should be used for
|
||||
// providing xpath query pointing to the target element node only, no extra
|
||||
// filtering on the element itself or its children; while streamElementFilter,
|
||||
// if needed, can provide additional filtering on the target element and its
|
||||
// children.
|
||||
//
|
||||
// CreateStreamParser returns an error if either streamElementXPath or
|
||||
// streamElementFilter, if provided, cannot be successfully parsed and compiled
|
||||
// into a valid xpath query.
|
||||
func CreateStreamParser(r io.Reader, streamElementXPath string, streamElementFilter ...string) (*StreamParser, error) {
|
||||
return CreateStreamParserWithOptions(r, ParserOptions{}, streamElementXPath, streamElementFilter...)
|
||||
}
|
||||
|
||||
// CreateStreamParserWithOptions is like CreateStreamParser, but with custom options
|
||||
func CreateStreamParserWithOptions(
|
||||
r io.Reader,
|
||||
options ParserOptions,
|
||||
streamElementXPath string,
|
||||
streamElementFilter ...string,
|
||||
) (*StreamParser, error) {
|
||||
elemXPath, err := getQuery(streamElementXPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid streamElementXPath '%s', err: %s", streamElementXPath, err.Error())
|
||||
}
|
||||
elemFilter := (*xpath.Expr)(nil)
|
||||
if len(streamElementFilter) > 0 {
|
||||
elemFilter, err = getQuery(streamElementFilter[0])
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid streamElementFilter '%s', err: %s", streamElementFilter[0], err.Error())
|
||||
}
|
||||
}
|
||||
parser := createParser(r)
|
||||
options.apply(parser)
|
||||
sp := &StreamParser{
|
||||
p: parser,
|
||||
}
|
||||
sp.p.streamElementXPath = elemXPath
|
||||
sp.p.streamElementFilter = elemFilter
|
||||
return sp, nil
|
||||
}
|
||||
|
||||
// Read returns a target node that satisfies the XPath specified by caller at
|
||||
// StreamParser creation time. If there is no more satisfying target nodes after
|
||||
// reading the rest of the XML document, io.EOF will be returned. At any time,
|
||||
// any XML parsing error encountered will be returned, and the stream parsing
|
||||
// stopped. Calling Read() after an error is returned (including io.EOF) results
|
||||
// undefined behavior. Also note, due to the streaming nature, calling Read()
|
||||
// will automatically remove any previous target node(s) from the document tree.
|
||||
func (sp *StreamParser) Read() (*Node, error) {
|
||||
// Because this is a streaming read, we need to release/remove last
|
||||
// target node from the node tree to free up memory.
|
||||
if sp.p.streamNode != nil {
|
||||
// We need to remove all siblings before the current stream node,
|
||||
// because the document may contain unwanted nodes between the target
|
||||
// ones (for example new line text node), which would otherwise
|
||||
// accumulate as first childs, and slow down the stream over time
|
||||
for sp.p.streamNode.PrevSibling != nil {
|
||||
RemoveFromTree(sp.p.streamNode.PrevSibling)
|
||||
}
|
||||
sp.p.prev = sp.p.streamNode.Parent
|
||||
RemoveFromTree(sp.p.streamNode)
|
||||
sp.p.streamNode = nil
|
||||
sp.p.streamNodePrev = nil
|
||||
}
|
||||
return sp.p.parse()
|
||||
}
|
||||
+304
@@ -0,0 +1,304 @@
|
||||
/*
|
||||
Package xmlquery provides extract data from XML documents using XPath expression.
|
||||
*/
|
||||
package xmlquery
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/antchfx/xpath"
|
||||
)
|
||||
|
||||
// SelectElements finds child elements with the specified name.
|
||||
func (n *Node) SelectElements(name string) []*Node {
|
||||
return Find(n, name)
|
||||
}
|
||||
|
||||
// SelectElement finds child elements with the specified name.
|
||||
func (n *Node) SelectElement(name string) *Node {
|
||||
return FindOne(n, name)
|
||||
}
|
||||
|
||||
// SelectAttr returns the attribute value with the specified name.
|
||||
func (n *Node) SelectAttr(name string) string {
|
||||
if n.Type == AttributeNode {
|
||||
if n.Data == name {
|
||||
return n.InnerText()
|
||||
}
|
||||
return ""
|
||||
}
|
||||
xmlName := newXMLName(name)
|
||||
for _, attr := range n.Attr {
|
||||
if attr.Name == xmlName {
|
||||
return attr.Value
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
var _ xpath.NodeNavigator = &NodeNavigator{}
|
||||
|
||||
// CreateXPathNavigator creates a new xpath.NodeNavigator for the specified
|
||||
// XML Node.
|
||||
func CreateXPathNavigator(top *Node) *NodeNavigator {
|
||||
return &NodeNavigator{curr: top, root: top, attr: -1}
|
||||
}
|
||||
|
||||
func getCurrentNode(it *xpath.NodeIterator) *Node {
|
||||
n := it.Current().(*NodeNavigator)
|
||||
if n.NodeType() == xpath.AttributeNode {
|
||||
childNode := &Node{
|
||||
Type: TextNode,
|
||||
Data: n.Value(),
|
||||
}
|
||||
return &Node{
|
||||
Parent: n.curr,
|
||||
Type: AttributeNode,
|
||||
Data: n.LocalName(),
|
||||
FirstChild: childNode,
|
||||
LastChild: childNode,
|
||||
}
|
||||
}
|
||||
return n.curr
|
||||
}
|
||||
|
||||
// Find is like QueryAll but panics if `expr` is not a valid XPath expression.
|
||||
// See `QueryAll()` function.
|
||||
func Find(top *Node, expr string) []*Node {
|
||||
nodes, err := QueryAll(top, expr)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return nodes
|
||||
}
|
||||
|
||||
// FindOne is like Query but panics if `expr` is not a valid XPath expression.
|
||||
// See `Query()` function.
|
||||
func FindOne(top *Node, expr string) *Node {
|
||||
node, err := Query(top, expr)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return node
|
||||
}
|
||||
|
||||
// QueryAll searches the XML Node that matches by the specified XPath expr.
|
||||
// Returns an error if the expression `expr` cannot be parsed.
|
||||
func QueryAll(top *Node, expr string) ([]*Node, error) {
|
||||
exp, err := getQuery(expr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return QuerySelectorAll(top, exp), nil
|
||||
}
|
||||
|
||||
// Query searches the XML Node that matches by the specified XPath expr,
|
||||
// and returns first matched element.
|
||||
func Query(top *Node, expr string) (*Node, error) {
|
||||
exp, err := getQuery(expr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return QuerySelector(top, exp), nil
|
||||
}
|
||||
|
||||
// QuerySelectorAll searches all of the XML Node that matches the specified
|
||||
// XPath selectors.
|
||||
func QuerySelectorAll(top *Node, selector *xpath.Expr) []*Node {
|
||||
t := selector.Select(CreateXPathNavigator(top))
|
||||
var elems []*Node
|
||||
for t.MoveNext() {
|
||||
elems = append(elems, getCurrentNode(t))
|
||||
}
|
||||
return elems
|
||||
}
|
||||
|
||||
// QuerySelector returns the first matched XML Node by the specified XPath
|
||||
// selector.
|
||||
func QuerySelector(top *Node, selector *xpath.Expr) *Node {
|
||||
t := selector.Select(CreateXPathNavigator(top))
|
||||
if t.MoveNext() {
|
||||
return getCurrentNode(t)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// FindEach searches the html.Node and calls functions cb.
|
||||
// Important: this method is deprecated, instead, use for .. = range Find(){}.
|
||||
func FindEach(top *Node, expr string, cb func(int, *Node)) {
|
||||
for i, n := range Find(top, expr) {
|
||||
cb(i, n)
|
||||
}
|
||||
}
|
||||
|
||||
// FindEachWithBreak functions the same as FindEach but allows to break the loop
|
||||
// by returning false from the callback function `cb`.
|
||||
// Important: this method is deprecated, instead, use .. = range Find(){}.
|
||||
func FindEachWithBreak(top *Node, expr string, cb func(int, *Node) bool) {
|
||||
for i, n := range Find(top, expr) {
|
||||
if !cb(i, n) {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type NodeNavigator struct {
|
||||
root, curr *Node
|
||||
attr int
|
||||
}
|
||||
|
||||
func (x *NodeNavigator) Current() *Node {
|
||||
return x.curr
|
||||
}
|
||||
|
||||
func (x *NodeNavigator) NodeType() xpath.NodeType {
|
||||
switch x.curr.Type {
|
||||
case CommentNode:
|
||||
return xpath.CommentNode
|
||||
case TextNode, CharDataNode, NotationNode:
|
||||
return xpath.TextNode
|
||||
case DeclarationNode, DocumentNode:
|
||||
return xpath.RootNode
|
||||
case ElementNode:
|
||||
if x.attr != -1 {
|
||||
return xpath.AttributeNode
|
||||
}
|
||||
return xpath.ElementNode
|
||||
}
|
||||
panic(fmt.Sprintf("unknown XML node type: %v", x.curr.Type))
|
||||
}
|
||||
|
||||
func (x *NodeNavigator) LocalName() string {
|
||||
if x.attr != -1 {
|
||||
return x.curr.Attr[x.attr].Name.Local
|
||||
}
|
||||
return x.curr.Data
|
||||
|
||||
}
|
||||
|
||||
func (x *NodeNavigator) Prefix() string {
|
||||
if x.NodeType() == xpath.AttributeNode {
|
||||
if x.attr != -1 {
|
||||
return x.curr.Attr[x.attr].Name.Space
|
||||
}
|
||||
return ""
|
||||
}
|
||||
return x.curr.Prefix
|
||||
}
|
||||
|
||||
func (x *NodeNavigator) NamespaceURL() string {
|
||||
if x.attr != -1 {
|
||||
return x.curr.Attr[x.attr].NamespaceURI
|
||||
}
|
||||
return x.curr.NamespaceURI
|
||||
}
|
||||
|
||||
func (x *NodeNavigator) Value() string {
|
||||
switch x.curr.Type {
|
||||
case CommentNode:
|
||||
return x.curr.Data
|
||||
case ElementNode:
|
||||
if x.attr != -1 {
|
||||
return x.curr.Attr[x.attr].Value
|
||||
}
|
||||
return x.curr.InnerText()
|
||||
case TextNode:
|
||||
return x.curr.Data
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (x *NodeNavigator) Copy() xpath.NodeNavigator {
|
||||
n := *x
|
||||
return &n
|
||||
}
|
||||
|
||||
func (x *NodeNavigator) MoveToRoot() {
|
||||
x.curr = x.root
|
||||
}
|
||||
|
||||
func (x *NodeNavigator) MoveToParent() bool {
|
||||
if x.attr != -1 {
|
||||
x.attr = -1
|
||||
return true
|
||||
} else if node := x.curr.Parent; node != nil {
|
||||
x.curr = node
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (x *NodeNavigator) MoveToNextAttribute() bool {
|
||||
if x.attr >= len(x.curr.Attr)-1 {
|
||||
return false
|
||||
}
|
||||
x.attr++
|
||||
return true
|
||||
}
|
||||
|
||||
func (x *NodeNavigator) MoveToChild() bool {
|
||||
if x.attr != -1 {
|
||||
return false
|
||||
}
|
||||
if node := x.curr.FirstChild; node != nil {
|
||||
x.curr = node
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (x *NodeNavigator) MoveToFirst() bool {
|
||||
if x.attr != -1 || x.curr.PrevSibling == nil {
|
||||
return false
|
||||
}
|
||||
for {
|
||||
node := x.curr.PrevSibling
|
||||
if node == nil {
|
||||
break
|
||||
}
|
||||
x.curr = node
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (x *NodeNavigator) String() string {
|
||||
return x.Value()
|
||||
}
|
||||
|
||||
func (x *NodeNavigator) MoveToNext() bool {
|
||||
if x.attr != -1 {
|
||||
return false
|
||||
}
|
||||
for node := x.curr.NextSibling; node != nil; node = x.curr.NextSibling {
|
||||
x.curr = node
|
||||
if x.curr.Type != TextNode || strings.TrimSpace(x.curr.Data) != "" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (x *NodeNavigator) MoveToPrevious() bool {
|
||||
if x.attr != -1 {
|
||||
return false
|
||||
}
|
||||
for node := x.curr.PrevSibling; node != nil; node = x.curr.PrevSibling {
|
||||
x.curr = node
|
||||
if x.curr.Type != TextNode || strings.TrimSpace(x.curr.Data) != "" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (x *NodeNavigator) MoveTo(other xpath.NodeNavigator) bool {
|
||||
node, ok := other.(*NodeNavigator)
|
||||
if !ok || node.root != x.root {
|
||||
return false
|
||||
}
|
||||
|
||||
x.curr = node.curr
|
||||
x.attr = node.attr
|
||||
return true
|
||||
}
|
||||
+32
@@ -0,0 +1,32 @@
|
||||
# vscode
|
||||
.vscode
|
||||
debug
|
||||
*.test
|
||||
|
||||
./build
|
||||
|
||||
# Compiled Object files, Static and Dynamic libs (Shared Objects)
|
||||
*.o
|
||||
*.a
|
||||
*.so
|
||||
|
||||
|
||||
# Folders
|
||||
_obj
|
||||
_test
|
||||
|
||||
# Architecture specific extensions/prefixes
|
||||
*.[568vq]
|
||||
[568vq].out
|
||||
|
||||
*.cgo1.go
|
||||
*.cgo2.c
|
||||
_cgo_defun.c
|
||||
_cgo_gotypes.go
|
||||
_cgo_export.*
|
||||
|
||||
_testmain.go
|
||||
|
||||
*.exe
|
||||
*.test
|
||||
*.prof
|
||||
+17
@@ -0,0 +1,17 @@
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
+167
@@ -0,0 +1,167 @@
|
||||
# XPath
|
||||
|
||||
[](https://godoc.org/github.com/antchfx/xpath)
|
||||
[](https://coveralls.io/github/antchfx/xpath?branch=master)
|
||||
[](https://github.com/antchfx/xpath/actions/workflows/testing.yml)
|
||||
[](https://goreportcard.com/report/github.com/antchfx/xpath)
|
||||
|
||||
XPath is Go package provides selecting nodes from XML, HTML or other documents using XPath expression.
|
||||
|
||||
# Implementation
|
||||
|
||||
- [htmlquery](https://github.com/antchfx/htmlquery) - an XPath query package for HTML document
|
||||
|
||||
- [xmlquery](https://github.com/antchfx/xmlquery) - an XPath query package for XML document.
|
||||
|
||||
- [jsonquery](https://github.com/antchfx/jsonquery) - an XPath query package for JSON document
|
||||
|
||||
# Supported Features
|
||||
|
||||
#### The basic XPath patterns.
|
||||
|
||||
> The basic XPath patterns cover 90% of the cases that most stylesheets will need.
|
||||
|
||||
- `node` : Selects all child elements with nodeName of node.
|
||||
|
||||
- `*` : Selects all child elements.
|
||||
|
||||
- `@attr` : Selects the attribute attr.
|
||||
|
||||
- `@*` : Selects all attributes.
|
||||
|
||||
- `node()` : Matches an org.w3c.dom.Node.
|
||||
|
||||
- `text()` : Matches a org.w3c.dom.Text node.
|
||||
|
||||
- `comment()` : Matches a comment.
|
||||
|
||||
- `.` : Selects the current node.
|
||||
|
||||
- `..` : Selects the parent of current node.
|
||||
|
||||
- `/` : Selects the document node.
|
||||
|
||||
- `a[expr]` : Select only those nodes matching a which also satisfy the expression expr.
|
||||
|
||||
- `a[n]` : Selects the nth matching node matching a When a filter's expression is a number, XPath selects based on position.
|
||||
|
||||
- `a/b` : For each node matching a, add the nodes matching b to the result.
|
||||
|
||||
- `a//b` : For each node matching a, add the descendant nodes matching b to the result.
|
||||
|
||||
- `//b` : Returns elements in the entire document matching b.
|
||||
|
||||
- `a|b` : All nodes matching a or b, union operation(not boolean or).
|
||||
|
||||
- `(a, b, c)` : Evaluates each of its operands and concatenates the resulting sequences, in order, into a single result sequence
|
||||
|
||||
- `(a/b)` : Selects all matches nodes as grouping set.
|
||||
|
||||
#### Node Axes
|
||||
|
||||
- `child::*` : The child axis selects children of the current node.
|
||||
|
||||
- `child::node()`: Selects all the children of the context node.
|
||||
- `child::text()`: Selects all text node children of the context node.
|
||||
|
||||
- `descendant::*` : The descendant axis selects descendants of the current node. It is equivalent to '//'.
|
||||
|
||||
- `descendant-or-self::*` : Selects descendants including the current node.
|
||||
|
||||
- `attribute::*` : Selects attributes of the current element. It is equivalent to @\*
|
||||
|
||||
- `following-sibling::*` : Selects nodes after the current node.
|
||||
|
||||
- `preceding-sibling::*` : Selects nodes before the current node.
|
||||
|
||||
- `following::*` : Selects the first matching node following in document order, excluding descendants.
|
||||
|
||||
- `preceding::*` : Selects the first matching node preceding in document order, excluding ancestors.
|
||||
|
||||
- `parent::*` : Selects the parent if it matches. The '..' pattern from the core is equivalent to 'parent::node()'.
|
||||
|
||||
- `ancestor::*` : Selects matching ancestors.
|
||||
|
||||
- `ancestor-or-self::*` : Selects ancestors including the current node.
|
||||
|
||||
- `self::*` : Selects the current node. '.' is equivalent to 'self::node()'.
|
||||
|
||||
#### Expressions
|
||||
|
||||
The gxpath supported three types: number, boolean, string.
|
||||
|
||||
- `path` : Selects nodes based on the path.
|
||||
|
||||
- `a = b` : Standard comparisons.
|
||||
|
||||
- `a = b` : True if a equals b.
|
||||
- `a != b` : True if a is not equal to b.
|
||||
- `a < b` : True if a is less than b.
|
||||
- `a <= b` : True if a is less than or equal to b.
|
||||
- `a > b` : True if a is greater than b.
|
||||
- `a >= b` : True if a is greater than or equal to b.
|
||||
|
||||
- `a + b` : Arithmetic expressions.
|
||||
|
||||
- `- a` Unary minus
|
||||
- `a + b` : Addition
|
||||
- `a - b` : Subtraction
|
||||
- `a * b` : Multiplication
|
||||
- `a div b` : Division
|
||||
- `a mod b` : Modulus (division remainder)
|
||||
|
||||
- `a or b` : Boolean `or` operation.
|
||||
|
||||
- `a and b` : Boolean `and` operation.
|
||||
|
||||
- `(expr)` : Parenthesized expressions.
|
||||
|
||||
- `fun(arg1, ..., argn)` : Function calls:
|
||||
|
||||
| Function | Supported |
|
||||
| ----------------------- | --------- |
|
||||
| `boolean()` | ✓ |
|
||||
| `ceiling()` | ✓ |
|
||||
| `choose()` | ✗ |
|
||||
| `concat()` | ✓ |
|
||||
| `contains()` | ✓ |
|
||||
| `count()` | ✓ |
|
||||
| `current()` | ✗ |
|
||||
| `document()` | ✗ |
|
||||
| `element-available()` | ✗ |
|
||||
| `ends-with()` | ✓ |
|
||||
| `false()` | ✓ |
|
||||
| `floor()` | ✓ |
|
||||
| `format-number()` | ✗ |
|
||||
| `function-available()` | ✗ |
|
||||
| `generate-id()` | ✗ |
|
||||
| `id()` | ✗ |
|
||||
| `key()` | ✗ |
|
||||
| `lang()` | ✗ |
|
||||
| `last()` | ✓ |
|
||||
| `local-name()` | ✓ |
|
||||
| `lower-case()`[^1] | ✓ |
|
||||
| `matches()` | ✓ |
|
||||
| `name()` | ✓ |
|
||||
| `namespace-uri()` | ✓ |
|
||||
| `normalize-space()` | ✓ |
|
||||
| `not()` | ✓ |
|
||||
| `number()` | ✓ |
|
||||
| `position()` | ✓ |
|
||||
| `replace()` | ✓ |
|
||||
| `reverse()` | ✓ |
|
||||
| `round()` | ✓ |
|
||||
| `starts-with()` | ✓ |
|
||||
| `string()` | ✓ |
|
||||
| `string-join()`[^1] | ✓ |
|
||||
| `string-length()` | ✓ |
|
||||
| `substring()` | ✓ |
|
||||
| `substring-after()` | ✓ |
|
||||
| `substring-before()` | ✓ |
|
||||
| `sum()` | ✓ |
|
||||
| `system-property()` | ✗ |
|
||||
| `translate()` | ✓ |
|
||||
| `true()` | ✓ |
|
||||
| `unparsed-entity-url()` | ✗ |
|
||||
|
||||
[^1]: XPath-2.0 expression
|
||||
+718
@@ -0,0 +1,718 @@
|
||||
package xpath
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
type flag int
|
||||
|
||||
var flagsEnum = struct {
|
||||
None flag
|
||||
SmartDesc flag
|
||||
PosFilter flag
|
||||
Filter flag
|
||||
Condition flag
|
||||
}{
|
||||
None: 0,
|
||||
SmartDesc: 1,
|
||||
PosFilter: 2,
|
||||
Filter: 4,
|
||||
Condition: 8,
|
||||
}
|
||||
|
||||
type builderProp int
|
||||
|
||||
var builderProps = struct {
|
||||
None builderProp
|
||||
PosFilter builderProp
|
||||
HasPosition builderProp
|
||||
HasLast builderProp
|
||||
NonFlat builderProp
|
||||
}{
|
||||
None: 0,
|
||||
PosFilter: 1,
|
||||
HasPosition: 2,
|
||||
HasLast: 4,
|
||||
NonFlat: 8,
|
||||
}
|
||||
|
||||
// builder provides building an XPath expressions.
|
||||
type builder struct {
|
||||
parseDepth int
|
||||
firstInput query
|
||||
}
|
||||
|
||||
// axisPredicate creates a predicate to predicating for this axis node.
|
||||
func axisPredicate(root *axisNode) func(NodeNavigator) bool {
|
||||
nametest := root.LocalName != "" || root.Prefix != ""
|
||||
predicate := func(n NodeNavigator) bool {
|
||||
if root.typeTest == n.NodeType() || root.typeTest == allNode {
|
||||
if nametest {
|
||||
type namespaceURL interface {
|
||||
NamespaceURL() string
|
||||
}
|
||||
if ns, ok := n.(namespaceURL); ok && root.hasNamespaceURI {
|
||||
return root.LocalName == n.LocalName() && root.namespaceURI == ns.NamespaceURL()
|
||||
}
|
||||
if root.LocalName == n.LocalName() && root.Prefix == n.Prefix() {
|
||||
return true
|
||||
}
|
||||
} else {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
return predicate
|
||||
}
|
||||
|
||||
// processAxis processes a query for the XPath axis node.
|
||||
func (b *builder) processAxis(root *axisNode, flags flag, props *builderProp) (query, error) {
|
||||
var (
|
||||
err error
|
||||
qyInput query
|
||||
qyOutput query
|
||||
)
|
||||
b.firstInput = nil
|
||||
predicate := axisPredicate(root)
|
||||
|
||||
if root.Input == nil {
|
||||
qyInput = &contextQuery{}
|
||||
*props = builderProps.None
|
||||
} else {
|
||||
inputFlags := flagsEnum.None
|
||||
if (flags & flagsEnum.Filter) == 0 {
|
||||
if root.AxisType == "child" && (root.Input.Type() == nodeAxis) {
|
||||
if input := root.Input.(*axisNode); input.AxisType == "descendant-or-self" {
|
||||
var qyGrandInput query
|
||||
if input.Input != nil {
|
||||
qyGrandInput, err = b.processNode(input.Input, flagsEnum.SmartDesc, props)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
} else {
|
||||
qyGrandInput = &contextQuery{}
|
||||
}
|
||||
qyOutput = &descendantQuery{name: root.LocalName, Input: qyGrandInput, Predicate: predicate, Self: false}
|
||||
*props |= builderProps.NonFlat
|
||||
return qyOutput, nil
|
||||
}
|
||||
}
|
||||
if root.AxisType == "descendant" || root.AxisType == "descendant-or-self" {
|
||||
inputFlags |= flagsEnum.SmartDesc
|
||||
}
|
||||
}
|
||||
|
||||
qyInput, err = b.processNode(root.Input, inputFlags, props)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
switch root.AxisType {
|
||||
case "ancestor":
|
||||
qyOutput = &ancestorQuery{name: root.LocalName, Input: qyInput, Predicate: predicate}
|
||||
*props |= builderProps.NonFlat
|
||||
case "ancestor-or-self":
|
||||
qyOutput = &ancestorQuery{name: root.LocalName, Input: qyInput, Predicate: predicate, Self: true}
|
||||
*props |= builderProps.NonFlat
|
||||
case "attribute":
|
||||
qyOutput = &attributeQuery{name: root.LocalName, Input: qyInput, Predicate: predicate}
|
||||
case "child":
|
||||
if (*props & builderProps.NonFlat) == 0 {
|
||||
qyOutput = &childQuery{name: root.LocalName, Input: qyInput, Predicate: predicate}
|
||||
} else {
|
||||
qyOutput = &cachedChildQuery{name: root.LocalName, Input: qyInput, Predicate: predicate}
|
||||
}
|
||||
case "descendant":
|
||||
if (flags & flagsEnum.SmartDesc) != flagsEnum.None {
|
||||
qyOutput = &descendantOverDescendantQuery{name: root.LocalName, Input: qyInput, MatchSelf: false, Predicate: predicate}
|
||||
} else {
|
||||
qyOutput = &descendantQuery{name: root.LocalName, Input: qyInput, Predicate: predicate}
|
||||
}
|
||||
*props |= builderProps.NonFlat
|
||||
case "descendant-or-self":
|
||||
if (flags & flagsEnum.SmartDesc) != flagsEnum.None {
|
||||
qyOutput = &descendantOverDescendantQuery{name: root.LocalName, Input: qyInput, MatchSelf: true, Predicate: predicate}
|
||||
} else {
|
||||
qyOutput = &descendantQuery{name: root.LocalName, Input: qyInput, Predicate: predicate, Self: true}
|
||||
}
|
||||
*props |= builderProps.NonFlat
|
||||
case "following":
|
||||
qyOutput = &followingQuery{Input: qyInput, Predicate: predicate}
|
||||
*props |= builderProps.NonFlat
|
||||
case "following-sibling":
|
||||
qyOutput = &followingQuery{Input: qyInput, Predicate: predicate, Sibling: true}
|
||||
case "parent":
|
||||
qyOutput = &parentQuery{Input: qyInput, Predicate: predicate}
|
||||
case "preceding":
|
||||
qyOutput = &precedingQuery{Input: qyInput, Predicate: predicate}
|
||||
*props |= builderProps.NonFlat
|
||||
case "preceding-sibling":
|
||||
qyOutput = &precedingQuery{Input: qyInput, Predicate: predicate, Sibling: true}
|
||||
case "self":
|
||||
qyOutput = &selfQuery{Input: qyInput, Predicate: predicate}
|
||||
case "namespace":
|
||||
// haha,what will you do someting??
|
||||
default:
|
||||
err = fmt.Errorf("unknown axe type: %s", root.AxisType)
|
||||
return nil, err
|
||||
}
|
||||
return qyOutput, nil
|
||||
}
|
||||
|
||||
func canBeNumber(q query) bool {
|
||||
if q.ValueType() != xpathResultType.Any {
|
||||
return q.ValueType() == xpathResultType.Number
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// processFilterNode builds query for the XPath filter predicate.
|
||||
func (b *builder) processFilter(root *filterNode, flags flag, props *builderProp) (query, error) {
|
||||
first := (flags & flagsEnum.Filter) == 0
|
||||
|
||||
qyInput, err := b.processNode(root.Input, (flags | flagsEnum.Filter), props)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
firstInput := b.firstInput
|
||||
|
||||
var propsCond builderProp
|
||||
cond, err := b.processNode(root.Condition, flags, &propsCond)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Checking whether is number
|
||||
if canBeNumber(cond) || ((propsCond & (builderProps.HasPosition | builderProps.HasLast)) != 0) {
|
||||
propsCond |= builderProps.HasPosition
|
||||
flags |= flagsEnum.PosFilter
|
||||
}
|
||||
|
||||
if root.Input.Type() != nodeFilter {
|
||||
*props &= ^builderProps.PosFilter
|
||||
}
|
||||
|
||||
if (propsCond & builderProps.HasPosition) != 0 {
|
||||
*props |= builderProps.PosFilter
|
||||
}
|
||||
|
||||
if (propsCond & builderProps.HasPosition) != builderProps.None {
|
||||
if (propsCond & builderProps.HasLast) != 0 {
|
||||
// https://github.com/antchfx/xpath/issues/76
|
||||
// https://github.com/antchfx/xpath/issues/78
|
||||
if qyFunc, ok := cond.(*functionQuery); ok {
|
||||
switch qyFunc.Input.(type) {
|
||||
case *filterQuery:
|
||||
cond = &lastFuncQuery{Input: qyFunc.Input}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
merge := (qyInput.Properties() & queryProps.Merge) != 0
|
||||
if first && firstInput != nil {
|
||||
if merge && ((*props & builderProps.PosFilter) != 0) {
|
||||
var (
|
||||
rootQuery = &contextQuery{}
|
||||
parent query
|
||||
)
|
||||
switch axisQuery := firstInput.(type) {
|
||||
case *ancestorQuery:
|
||||
if _, ok := axisQuery.Input.(*contextQuery); !ok {
|
||||
parent = axisQuery.Input
|
||||
axisQuery.Input = rootQuery
|
||||
}
|
||||
case *attributeQuery:
|
||||
if _, ok := axisQuery.Input.(*contextQuery); !ok {
|
||||
parent = axisQuery.Input
|
||||
axisQuery.Input = rootQuery
|
||||
}
|
||||
case *childQuery:
|
||||
if _, ok := axisQuery.Input.(*contextQuery); !ok {
|
||||
parent = axisQuery.Input
|
||||
axisQuery.Input = rootQuery
|
||||
}
|
||||
case *cachedChildQuery:
|
||||
if _, ok := axisQuery.Input.(*contextQuery); !ok {
|
||||
parent = axisQuery.Input
|
||||
axisQuery.Input = rootQuery
|
||||
}
|
||||
case *descendantQuery:
|
||||
if _, ok := axisQuery.Input.(*contextQuery); !ok {
|
||||
parent = axisQuery.Input
|
||||
axisQuery.Input = rootQuery
|
||||
}
|
||||
case *followingQuery:
|
||||
if _, ok := axisQuery.Input.(*contextQuery); !ok {
|
||||
parent = axisQuery.Input
|
||||
axisQuery.Input = rootQuery
|
||||
}
|
||||
case *precedingQuery:
|
||||
if _, ok := axisQuery.Input.(*contextQuery); !ok {
|
||||
parent = axisQuery.Input
|
||||
axisQuery.Input = rootQuery
|
||||
}
|
||||
case *parentQuery:
|
||||
if _, ok := axisQuery.Input.(*contextQuery); !ok {
|
||||
parent = axisQuery.Input
|
||||
axisQuery.Input = rootQuery
|
||||
}
|
||||
case *selfQuery:
|
||||
if _, ok := axisQuery.Input.(*contextQuery); !ok {
|
||||
parent = axisQuery.Input
|
||||
axisQuery.Input = rootQuery
|
||||
}
|
||||
case *groupQuery:
|
||||
if _, ok := axisQuery.Input.(*contextQuery); !ok {
|
||||
parent = axisQuery.Input
|
||||
axisQuery.Input = rootQuery
|
||||
}
|
||||
case *descendantOverDescendantQuery:
|
||||
if _, ok := axisQuery.Input.(*contextQuery); !ok {
|
||||
parent = axisQuery.Input
|
||||
axisQuery.Input = rootQuery
|
||||
}
|
||||
}
|
||||
b.firstInput = nil
|
||||
child := &filterQuery{Input: qyInput, Predicate: cond, NoPosition: false}
|
||||
if parent != nil {
|
||||
return &mergeQuery{Input: parent, Child: child}, nil
|
||||
}
|
||||
return child, nil
|
||||
}
|
||||
b.firstInput = nil
|
||||
}
|
||||
|
||||
resultQuery := &filterQuery{
|
||||
Input: qyInput,
|
||||
Predicate: cond,
|
||||
NoPosition: (propsCond & builderProps.HasPosition) == 0,
|
||||
}
|
||||
return resultQuery, nil
|
||||
}
|
||||
|
||||
// processFunctionNode processes query for the XPath function node.
|
||||
func (b *builder) processFunction(root *functionNode, props *builderProp) (query, error) {
|
||||
// Reset builder props
|
||||
*props = builderProps.None
|
||||
|
||||
var qyOutput query
|
||||
switch root.FuncName {
|
||||
case "lower-case":
|
||||
arg, err := b.processNode(root.Args[0], flagsEnum.None, props)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
qyOutput = &functionQuery{Func: lowerCaseFunc(arg)}
|
||||
case "starts-with":
|
||||
arg1, err := b.processNode(root.Args[0], flagsEnum.None, props)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
arg2, err := b.processNode(root.Args[1], flagsEnum.None, props)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
qyOutput = &functionQuery{Func: startwithFunc(arg1, arg2)}
|
||||
case "ends-with":
|
||||
arg1, err := b.processNode(root.Args[0], flagsEnum.None, props)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
arg2, err := b.processNode(root.Args[1], flagsEnum.None, props)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
qyOutput = &functionQuery{Func: endwithFunc(arg1, arg2)}
|
||||
case "contains":
|
||||
arg1, err := b.processNode(root.Args[0], flagsEnum.None, props)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
arg2, err := b.processNode(root.Args[1], flagsEnum.None, props)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
qyOutput = &functionQuery{Func: containsFunc(arg1, arg2)}
|
||||
case "matches":
|
||||
//matches(string , pattern)
|
||||
if len(root.Args) != 2 {
|
||||
return nil, errors.New("xpath: matches function must have two parameters")
|
||||
}
|
||||
var (
|
||||
arg1, arg2 query
|
||||
err error
|
||||
)
|
||||
if arg1, err = b.processNode(root.Args[0], flagsEnum.None, props); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if arg2, err = b.processNode(root.Args[1], flagsEnum.None, props); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// Issue #92, testing the regular expression before.
|
||||
if q, ok := arg2.(*constantQuery); ok {
|
||||
if _, err = getRegexp(q.Val.(string)); err != nil {
|
||||
return nil, fmt.Errorf("matches() got error. %v", err)
|
||||
}
|
||||
}
|
||||
qyOutput = &functionQuery{Func: matchesFunc(arg1, arg2)}
|
||||
case "substring":
|
||||
//substring( string , start [, length] )
|
||||
if len(root.Args) < 2 {
|
||||
return nil, errors.New("xpath: substring function must have at least two parameter")
|
||||
}
|
||||
var (
|
||||
arg1, arg2, arg3 query
|
||||
err error
|
||||
)
|
||||
if arg1, err = b.processNode(root.Args[0], flagsEnum.None, props); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if arg2, err = b.processNode(root.Args[1], flagsEnum.None, props); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(root.Args) == 3 {
|
||||
if arg3, err = b.processNode(root.Args[2], flagsEnum.None, props); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
qyOutput = &functionQuery{Func: substringFunc(arg1, arg2, arg3)}
|
||||
case "substring-before", "substring-after":
|
||||
//substring-xxxx( haystack, needle )
|
||||
if len(root.Args) != 2 {
|
||||
return nil, errors.New("xpath: substring-before function must have two parameters")
|
||||
}
|
||||
var (
|
||||
arg1, arg2 query
|
||||
err error
|
||||
)
|
||||
if arg1, err = b.processNode(root.Args[0], flagsEnum.None, props); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if arg2, err = b.processNode(root.Args[1], flagsEnum.None, props); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
qyOutput = &functionQuery{
|
||||
Func: substringIndFunc(arg1, arg2, root.FuncName == "substring-after"),
|
||||
}
|
||||
case "string-length":
|
||||
// string-length( [string] )
|
||||
if len(root.Args) < 1 {
|
||||
return nil, errors.New("xpath: string-length function must have at least one parameter")
|
||||
}
|
||||
arg1, err := b.processNode(root.Args[0], flagsEnum.None, props)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
qyOutput = &functionQuery{Func: stringLengthFunc(arg1)}
|
||||
case "normalize-space":
|
||||
var arg node
|
||||
if len(root.Args) > 0 {
|
||||
arg = root.Args[0]
|
||||
} else {
|
||||
arg = newAxisNode("self", allNode, "", "", "", nil)
|
||||
}
|
||||
arg1, err := b.processNode(arg, flagsEnum.None, props)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
qyOutput = &functionQuery{Func: normalizespaceFunc(arg1)}
|
||||
case "replace":
|
||||
//replace( string , string, string )
|
||||
if len(root.Args) != 3 {
|
||||
return nil, errors.New("xpath: replace function must have three parameters")
|
||||
}
|
||||
var (
|
||||
arg1, arg2, arg3 query
|
||||
err error
|
||||
)
|
||||
if arg1, err = b.processNode(root.Args[0], flagsEnum.None, props); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if arg2, err = b.processNode(root.Args[1], flagsEnum.None, props); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if arg3, err = b.processNode(root.Args[2], flagsEnum.None, props); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
qyOutput = &functionQuery{Func: replaceFunc(arg1, arg2, arg3)}
|
||||
case "translate":
|
||||
//translate( string , string, string )
|
||||
if len(root.Args) != 3 {
|
||||
return nil, errors.New("xpath: translate function must have three parameters")
|
||||
}
|
||||
var (
|
||||
arg1, arg2, arg3 query
|
||||
err error
|
||||
)
|
||||
if arg1, err = b.processNode(root.Args[0], flagsEnum.None, props); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if arg2, err = b.processNode(root.Args[1], flagsEnum.None, props); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if arg3, err = b.processNode(root.Args[2], flagsEnum.None, props); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
qyOutput = &functionQuery{Func: translateFunc(arg1, arg2, arg3)}
|
||||
case "not":
|
||||
if len(root.Args) == 0 {
|
||||
return nil, errors.New("xpath: not function must have at least one parameter")
|
||||
}
|
||||
argQuery, err := b.processNode(root.Args[0], flagsEnum.None, props)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
qyOutput = &functionQuery{Func: notFunc(argQuery)}
|
||||
case "name", "local-name", "namespace-uri":
|
||||
if len(root.Args) > 1 {
|
||||
return nil, fmt.Errorf("xpath: %s function must have at most one parameter", root.FuncName)
|
||||
}
|
||||
var (
|
||||
arg query
|
||||
err error
|
||||
)
|
||||
if len(root.Args) == 1 {
|
||||
arg, err = b.processNode(root.Args[0], flagsEnum.None, props)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
switch root.FuncName {
|
||||
case "name":
|
||||
qyOutput = &functionQuery{Func: nameFunc(arg)}
|
||||
case "local-name":
|
||||
qyOutput = &functionQuery{Func: localNameFunc(arg)}
|
||||
case "namespace-uri":
|
||||
qyOutput = &functionQuery{Func: namespaceFunc(arg)}
|
||||
}
|
||||
case "true", "false":
|
||||
val := root.FuncName == "true"
|
||||
qyOutput = &functionQuery{
|
||||
Func: func(_ query, _ iterator) interface{} {
|
||||
return val
|
||||
},
|
||||
}
|
||||
case "last":
|
||||
qyOutput = &functionQuery{Input: b.firstInput, Func: lastFunc()}
|
||||
*props |= builderProps.HasLast
|
||||
case "position":
|
||||
qyOutput = &functionQuery{Input: b.firstInput, Func: positionFunc()}
|
||||
*props |= builderProps.HasPosition
|
||||
case "boolean", "number", "string":
|
||||
var inp query
|
||||
if len(root.Args) > 1 {
|
||||
return nil, fmt.Errorf("xpath: %s function must have at most one parameter", root.FuncName)
|
||||
}
|
||||
if len(root.Args) == 1 {
|
||||
argQuery, err := b.processNode(root.Args[0], flagsEnum.None, props)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
inp = argQuery
|
||||
}
|
||||
switch root.FuncName {
|
||||
case "boolean":
|
||||
qyOutput = &functionQuery{Func: booleanFunc(inp)}
|
||||
case "string":
|
||||
qyOutput = &functionQuery{Func: stringFunc(inp)}
|
||||
case "number":
|
||||
qyOutput = &functionQuery{Func: numberFunc(inp)}
|
||||
}
|
||||
case "count":
|
||||
if len(root.Args) == 0 {
|
||||
return nil, fmt.Errorf("xpath: count(node-sets) function must with have parameters node-sets")
|
||||
}
|
||||
argQuery, err := b.processNode(root.Args[0], flagsEnum.None, props)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
qyOutput = &functionQuery{Func: countFunc(argQuery)}
|
||||
case "sum":
|
||||
if len(root.Args) == 0 {
|
||||
return nil, fmt.Errorf("xpath: sum(node-sets) function must with have parameters node-sets")
|
||||
}
|
||||
argQuery, err := b.processNode(root.Args[0], flagsEnum.None, props)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
qyOutput = &functionQuery{Func: sumFunc(argQuery)}
|
||||
case "ceiling", "floor", "round":
|
||||
if len(root.Args) == 0 {
|
||||
return nil, fmt.Errorf("xpath: ceiling(node-sets) function must with have parameters node-sets")
|
||||
}
|
||||
argQuery, err := b.processNode(root.Args[0], flagsEnum.None, props)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
switch root.FuncName {
|
||||
case "ceiling":
|
||||
qyOutput = &functionQuery{Func: ceilingFunc(argQuery)}
|
||||
case "floor":
|
||||
qyOutput = &functionQuery{Func: floorFunc(argQuery)}
|
||||
case "round":
|
||||
qyOutput = &functionQuery{Func: roundFunc(argQuery)}
|
||||
}
|
||||
case "concat":
|
||||
if len(root.Args) < 2 {
|
||||
return nil, fmt.Errorf("xpath: concat() must have at least two arguments")
|
||||
}
|
||||
var args []query
|
||||
for _, v := range root.Args {
|
||||
q, err := b.processNode(v, flagsEnum.None, props)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
args = append(args, q)
|
||||
}
|
||||
qyOutput = &functionQuery{Func: concatFunc(args...)}
|
||||
case "reverse":
|
||||
if len(root.Args) == 0 {
|
||||
return nil, fmt.Errorf("xpath: reverse(node-sets) function must with have parameters node-sets")
|
||||
}
|
||||
argQuery, err := b.processNode(root.Args[0], flagsEnum.None, props)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
qyOutput = &transformFunctionQuery{Input: argQuery, Func: reverseFunc}
|
||||
case "string-join":
|
||||
if len(root.Args) != 2 {
|
||||
return nil, fmt.Errorf("xpath: string-join(node-sets, separator) function requires node-set and argument")
|
||||
}
|
||||
input, err := b.processNode(root.Args[0], flagsEnum.None, props)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
arg1, err := b.processNode(root.Args[1], flagsEnum.None, props)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
qyOutput = &functionQuery{Func: stringJoinFunc(input, arg1)}
|
||||
default:
|
||||
return nil, fmt.Errorf("not yet support this function %s()", root.FuncName)
|
||||
}
|
||||
return qyOutput, nil
|
||||
}
|
||||
|
||||
func (b *builder) processOperator(root *operatorNode, props *builderProp) (query, error) {
|
||||
var (
|
||||
leftProp builderProp
|
||||
rightProp builderProp
|
||||
)
|
||||
|
||||
left, err := b.processNode(root.Left, flagsEnum.None, &leftProp)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
right, err := b.processNode(root.Right, flagsEnum.None, &rightProp)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
*props = leftProp | rightProp
|
||||
|
||||
var qyOutput query
|
||||
switch root.Op {
|
||||
case "+", "-", "*", "div", "mod": // Numeric operator
|
||||
var exprFunc func(iterator, interface{}, interface{}) interface{}
|
||||
switch root.Op {
|
||||
case "+":
|
||||
exprFunc = plusFunc
|
||||
case "-":
|
||||
exprFunc = minusFunc
|
||||
case "*":
|
||||
exprFunc = mulFunc
|
||||
case "div":
|
||||
exprFunc = divFunc
|
||||
case "mod":
|
||||
exprFunc = modFunc
|
||||
}
|
||||
qyOutput = &numericQuery{Left: left, Right: right, Do: exprFunc}
|
||||
case "=", ">", ">=", "<", "<=", "!=":
|
||||
var exprFunc func(iterator, interface{}, interface{}) interface{}
|
||||
switch root.Op {
|
||||
case "=":
|
||||
exprFunc = eqFunc
|
||||
case ">":
|
||||
exprFunc = gtFunc
|
||||
case ">=":
|
||||
exprFunc = geFunc
|
||||
case "<":
|
||||
exprFunc = ltFunc
|
||||
case "<=":
|
||||
exprFunc = leFunc
|
||||
case "!=":
|
||||
exprFunc = neFunc
|
||||
}
|
||||
qyOutput = &logicalQuery{Left: left, Right: right, Do: exprFunc}
|
||||
case "or", "and":
|
||||
isOr := false
|
||||
if root.Op == "or" {
|
||||
isOr = true
|
||||
}
|
||||
qyOutput = &booleanQuery{Left: left, Right: right, IsOr: isOr}
|
||||
case "|":
|
||||
*props |= builderProps.NonFlat
|
||||
qyOutput = &unionQuery{Left: left, Right: right}
|
||||
}
|
||||
return qyOutput, nil
|
||||
}
|
||||
|
||||
func (b *builder) processNode(root node, flags flag, props *builderProp) (q query, err error) {
|
||||
if b.parseDepth = b.parseDepth + 1; b.parseDepth > 1024 {
|
||||
err = errors.New("the xpath expressions is too complex")
|
||||
return
|
||||
}
|
||||
*props = builderProps.None
|
||||
switch root.Type() {
|
||||
case nodeConstantOperand:
|
||||
n := root.(*operandNode)
|
||||
q = &constantQuery{Val: n.Val}
|
||||
case nodeRoot:
|
||||
q = &absoluteQuery{}
|
||||
case nodeAxis:
|
||||
q, err = b.processAxis(root.(*axisNode), flags, props)
|
||||
b.firstInput = q
|
||||
case nodeFilter:
|
||||
q, err = b.processFilter(root.(*filterNode), flags, props)
|
||||
b.firstInput = q
|
||||
case nodeFunction:
|
||||
q, err = b.processFunction(root.(*functionNode), props)
|
||||
case nodeOperator:
|
||||
q, err = b.processOperator(root.(*operatorNode), props)
|
||||
case nodeGroup:
|
||||
q, err = b.processNode(root.(*groupNode).Input, flagsEnum.None, props)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
q = &groupQuery{Input: q}
|
||||
if b.firstInput == nil {
|
||||
b.firstInput = q
|
||||
}
|
||||
}
|
||||
b.parseDepth--
|
||||
return
|
||||
}
|
||||
|
||||
// build builds a specified XPath expressions expr.
|
||||
func build(expr string, namespaces map[string]string) (q query, err error) {
|
||||
defer func() {
|
||||
if e := recover(); e != nil {
|
||||
switch x := e.(type) {
|
||||
case string:
|
||||
err = errors.New(x)
|
||||
case error:
|
||||
err = x
|
||||
default:
|
||||
err = errors.New("unknown panic")
|
||||
}
|
||||
}
|
||||
}()
|
||||
root := parse(expr, namespaces)
|
||||
b := &builder{}
|
||||
props := builderProps.None
|
||||
return b.processNode(root, flagsEnum.None, &props)
|
||||
}
|
||||
+80
@@ -0,0 +1,80 @@
|
||||
package xpath
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"sync"
|
||||
)
|
||||
|
||||
type loadFunc func(key interface{}) (interface{}, error)
|
||||
|
||||
const (
|
||||
defaultCap = 65536
|
||||
)
|
||||
|
||||
// The reason we're building a simple capacity-resetting loading cache (when capacity reached) instead of using
|
||||
// something like github.com/hashicorp/golang-lru is primarily due to (not wanting to create) external dependency.
|
||||
// Currently this library has 0 external dep (other than go sdk), and supports go 1.6, 1.9, and 1.10 (and later).
|
||||
// Creating external lib dependencies (plus their transitive dependencies) would make things hard if not impossible.
|
||||
// We expect under most circumstances, the defaultCap is big enough for any long running services that use this
|
||||
// library if their xpath regexp cardinality is low. However, in extreme cases when the capacity is reached, we
|
||||
// simply reset the cache, taking a small subsequent perf hit (next to nothing considering amortization) in trade
|
||||
// of more complex and less performant LRU type of construct.
|
||||
type loadingCache struct {
|
||||
sync.RWMutex
|
||||
cap int
|
||||
load loadFunc
|
||||
m map[interface{}]interface{}
|
||||
reset int
|
||||
}
|
||||
|
||||
// NewLoadingCache creates a new instance of a loading cache with capacity. Capacity must be >= 0, or
|
||||
// it will panic. Capacity == 0 means the cache growth is unbounded.
|
||||
func NewLoadingCache(load loadFunc, capacity int) *loadingCache {
|
||||
if capacity < 0 {
|
||||
panic("capacity must be >= 0")
|
||||
}
|
||||
return &loadingCache{cap: capacity, load: load, m: make(map[interface{}]interface{})}
|
||||
}
|
||||
|
||||
func (c *loadingCache) get(key interface{}) (interface{}, error) {
|
||||
c.RLock()
|
||||
v, found := c.m[key]
|
||||
c.RUnlock()
|
||||
if found {
|
||||
return v, nil
|
||||
}
|
||||
v, err := c.load(key)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
c.Lock()
|
||||
if c.cap > 0 && len(c.m) >= c.cap {
|
||||
c.m = map[interface{}]interface{}{key: v}
|
||||
c.reset++
|
||||
} else {
|
||||
c.m[key] = v
|
||||
}
|
||||
c.Unlock()
|
||||
return v, nil
|
||||
}
|
||||
|
||||
var (
|
||||
// RegexpCache is a loading cache for string -> *regexp.Regexp mapping. It is exported so that in rare cases
|
||||
// client can customize load func and/or capacity.
|
||||
RegexpCache = defaultRegexpCache()
|
||||
)
|
||||
|
||||
func defaultRegexpCache() *loadingCache {
|
||||
return NewLoadingCache(
|
||||
func(key interface{}) (interface{}, error) {
|
||||
return regexp.Compile(key.(string))
|
||||
}, defaultCap)
|
||||
}
|
||||
|
||||
func getRegexp(pattern string) (*regexp.Regexp, error) {
|
||||
exp, err := RegexpCache.get(pattern)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return exp.(*regexp.Regexp), nil
|
||||
}
|
||||
+679
@@ -0,0 +1,679 @@
|
||||
package xpath
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
// Defined an interface of stringBuilder that compatible with
|
||||
// strings.Builder(go 1.10) and bytes.Buffer(< go 1.10)
|
||||
type stringBuilder interface {
|
||||
WriteRune(r rune) (n int, err error)
|
||||
WriteString(s string) (int, error)
|
||||
Reset()
|
||||
Grow(n int)
|
||||
String() string
|
||||
}
|
||||
|
||||
var builderPool = sync.Pool{New: func() interface{} {
|
||||
return newStringBuilder()
|
||||
}}
|
||||
|
||||
// The XPath function list.
|
||||
|
||||
func predicate(q query) func(NodeNavigator) bool {
|
||||
type Predicater interface {
|
||||
Test(NodeNavigator) bool
|
||||
}
|
||||
if p, ok := q.(Predicater); ok {
|
||||
return p.Test
|
||||
}
|
||||
return func(NodeNavigator) bool { return true }
|
||||
}
|
||||
|
||||
// positionFunc is a XPath Node Set functions position().
|
||||
func positionFunc() func(query, iterator) interface{} {
|
||||
return func(q query, t iterator) interface{} {
|
||||
var (
|
||||
count = 1
|
||||
node = t.Current().Copy()
|
||||
)
|
||||
test := predicate(q)
|
||||
for node.MoveToPrevious() {
|
||||
if test(node) {
|
||||
count++
|
||||
}
|
||||
}
|
||||
return float64(count)
|
||||
}
|
||||
}
|
||||
|
||||
// lastFunc is a XPath Node Set functions last().
|
||||
func lastFunc() func(query, iterator) interface{} {
|
||||
return func(q query, t iterator) interface{} {
|
||||
var (
|
||||
count = 0
|
||||
node = t.Current().Copy()
|
||||
)
|
||||
test := predicate(q)
|
||||
node.MoveToFirst()
|
||||
for {
|
||||
if test(node) {
|
||||
count++
|
||||
}
|
||||
if !node.MoveToNext() {
|
||||
break
|
||||
}
|
||||
}
|
||||
return float64(count)
|
||||
}
|
||||
}
|
||||
|
||||
// countFunc is a XPath Node Set functions count(node-set).
|
||||
func countFunc(arg query) func(query, iterator) interface{} {
|
||||
return func(_ query, t iterator) interface{} {
|
||||
var count = 0
|
||||
q := functionArgs(arg)
|
||||
test := predicate(q)
|
||||
switch typ := q.Evaluate(t).(type) {
|
||||
case query:
|
||||
for node := typ.Select(t); node != nil; node = typ.Select(t) {
|
||||
if test(node) {
|
||||
count++
|
||||
}
|
||||
}
|
||||
}
|
||||
return float64(count)
|
||||
}
|
||||
}
|
||||
|
||||
// sumFunc is a XPath Node Set functions sum(node-set).
|
||||
func sumFunc(arg query) func(query, iterator) interface{} {
|
||||
return func(_ query, t iterator) interface{} {
|
||||
var sum float64
|
||||
switch typ := functionArgs(arg).Evaluate(t).(type) {
|
||||
case query:
|
||||
for node := typ.Select(t); node != nil; node = typ.Select(t) {
|
||||
if v, err := strconv.ParseFloat(node.Value(), 64); err == nil {
|
||||
sum += v
|
||||
}
|
||||
}
|
||||
case float64:
|
||||
sum = typ
|
||||
case string:
|
||||
v, err := strconv.ParseFloat(typ, 64)
|
||||
if err != nil {
|
||||
panic(errors.New("sum() function argument type must be a node-set or number"))
|
||||
}
|
||||
sum = v
|
||||
}
|
||||
return sum
|
||||
}
|
||||
}
|
||||
|
||||
func asNumber(t iterator, o interface{}) float64 {
|
||||
switch typ := o.(type) {
|
||||
case query:
|
||||
node := typ.Select(t)
|
||||
if node == nil {
|
||||
return math.NaN()
|
||||
}
|
||||
if v, err := strconv.ParseFloat(node.Value(), 64); err == nil {
|
||||
return v
|
||||
}
|
||||
case float64:
|
||||
return typ
|
||||
case string:
|
||||
v, err := strconv.ParseFloat(typ, 64)
|
||||
if err == nil {
|
||||
return v
|
||||
}
|
||||
}
|
||||
return math.NaN()
|
||||
}
|
||||
|
||||
// ceilingFunc is a XPath Node Set functions ceiling(node-set).
|
||||
func ceilingFunc(arg query) func(query, iterator) interface{} {
|
||||
return func(_ query, t iterator) interface{} {
|
||||
val := asNumber(t, functionArgs(arg).Evaluate(t))
|
||||
// if math.IsNaN(val) {
|
||||
// panic(errors.New("ceiling() function argument type must be a valid number"))
|
||||
// }
|
||||
return math.Ceil(val)
|
||||
}
|
||||
}
|
||||
|
||||
// floorFunc is a XPath Node Set functions floor(node-set).
|
||||
func floorFunc(arg query) func(query, iterator) interface{} {
|
||||
return func(_ query, t iterator) interface{} {
|
||||
val := asNumber(t, functionArgs(arg).Evaluate(t))
|
||||
return math.Floor(val)
|
||||
}
|
||||
}
|
||||
|
||||
// roundFunc is a XPath Node Set functions round(node-set).
|
||||
func roundFunc(arg query) func(query, iterator) interface{} {
|
||||
return func(_ query, t iterator) interface{} {
|
||||
val := asNumber(t, functionArgs(arg).Evaluate(t))
|
||||
//return math.Round(val)
|
||||
return round(val)
|
||||
}
|
||||
}
|
||||
|
||||
// nameFunc is a XPath functions name([node-set]).
|
||||
func nameFunc(arg query) func(query, iterator) interface{} {
|
||||
return func(_ query, t iterator) interface{} {
|
||||
var v NodeNavigator
|
||||
if arg == nil {
|
||||
v = t.Current()
|
||||
} else {
|
||||
v = arg.Clone().Select(t)
|
||||
if v == nil {
|
||||
return ""
|
||||
}
|
||||
}
|
||||
ns := v.Prefix()
|
||||
if ns == "" {
|
||||
return v.LocalName()
|
||||
}
|
||||
return ns + ":" + v.LocalName()
|
||||
}
|
||||
}
|
||||
|
||||
// localNameFunc is a XPath functions local-name([node-set]).
|
||||
func localNameFunc(arg query) func(query, iterator) interface{} {
|
||||
return func(_ query, t iterator) interface{} {
|
||||
var v NodeNavigator
|
||||
if arg == nil {
|
||||
v = t.Current()
|
||||
} else {
|
||||
v = arg.Clone().Select(t)
|
||||
if v == nil {
|
||||
return ""
|
||||
}
|
||||
}
|
||||
return v.LocalName()
|
||||
}
|
||||
}
|
||||
|
||||
// namespaceFunc is a XPath functions namespace-uri([node-set]).
|
||||
func namespaceFunc(arg query) func(query, iterator) interface{} {
|
||||
return func(_ query, t iterator) interface{} {
|
||||
var v NodeNavigator
|
||||
if arg == nil {
|
||||
v = t.Current()
|
||||
} else {
|
||||
// Get the first node in the node-set if specified.
|
||||
v = arg.Clone().Select(t)
|
||||
if v == nil {
|
||||
return ""
|
||||
}
|
||||
}
|
||||
// fix about namespace-uri() bug: https://github.com/antchfx/xmlquery/issues/22
|
||||
// TODO: In the next version, add NamespaceURL() to the NodeNavigator interface.
|
||||
type namespaceURL interface {
|
||||
NamespaceURL() string
|
||||
}
|
||||
if f, ok := v.(namespaceURL); ok {
|
||||
return f.NamespaceURL()
|
||||
}
|
||||
return v.Prefix()
|
||||
}
|
||||
}
|
||||
|
||||
func asBool(t iterator, v interface{}) bool {
|
||||
switch v := v.(type) {
|
||||
case nil:
|
||||
return false
|
||||
case *NodeIterator:
|
||||
return v.MoveNext()
|
||||
case bool:
|
||||
return v
|
||||
case float64:
|
||||
return v != 0
|
||||
case string:
|
||||
return v != ""
|
||||
case query:
|
||||
return v.Select(t) != nil
|
||||
default:
|
||||
panic(fmt.Errorf("unexpected type: %T", v))
|
||||
}
|
||||
}
|
||||
|
||||
func asString(t iterator, v interface{}) string {
|
||||
switch v := v.(type) {
|
||||
case nil:
|
||||
return ""
|
||||
case bool:
|
||||
if v {
|
||||
return "true"
|
||||
}
|
||||
return "false"
|
||||
case float64:
|
||||
return strconv.FormatFloat(v, 'g', -1, 64)
|
||||
case string:
|
||||
return v
|
||||
case query:
|
||||
node := v.Select(t)
|
||||
if node == nil {
|
||||
return ""
|
||||
}
|
||||
return node.Value()
|
||||
default:
|
||||
panic(fmt.Errorf("unexpected type: %T", v))
|
||||
}
|
||||
}
|
||||
|
||||
// booleanFunc is a XPath functions boolean([node-set]).
|
||||
func booleanFunc(arg1 query) func(query, iterator) interface{} {
|
||||
return func(_ query, t iterator) interface{} {
|
||||
v := functionArgs(arg1).Evaluate(t)
|
||||
return asBool(t, v)
|
||||
}
|
||||
}
|
||||
|
||||
// numberFunc is a XPath functions number([node-set]).
|
||||
func numberFunc(arg1 query) func(query, iterator) interface{} {
|
||||
return func(_ query, t iterator) interface{} {
|
||||
v := functionArgs(arg1).Evaluate(t)
|
||||
return asNumber(t, v)
|
||||
}
|
||||
}
|
||||
|
||||
// stringFunc is a XPath functions string([node-set]).
|
||||
func stringFunc(arg1 query) func(query, iterator) interface{} {
|
||||
return func(_ query, t iterator) interface{} {
|
||||
v := functionArgs(arg1).Evaluate(t)
|
||||
return asString(t, v)
|
||||
}
|
||||
}
|
||||
|
||||
// startwithFunc is a XPath functions starts-with(string, string).
|
||||
func startwithFunc(arg1, arg2 query) func(query, iterator) interface{} {
|
||||
return func(_ query, t iterator) interface{} {
|
||||
var (
|
||||
m, n string
|
||||
ok bool
|
||||
)
|
||||
switch typ := functionArgs(arg1).Evaluate(t).(type) {
|
||||
case string:
|
||||
m = typ
|
||||
case query:
|
||||
node := typ.Select(t)
|
||||
if node == nil {
|
||||
return false
|
||||
}
|
||||
m = node.Value()
|
||||
default:
|
||||
panic(errors.New("starts-with() function argument type must be string"))
|
||||
}
|
||||
n, ok = functionArgs(arg2).Evaluate(t).(string)
|
||||
if !ok {
|
||||
panic(errors.New("starts-with() function argument type must be string"))
|
||||
}
|
||||
return strings.HasPrefix(m, n)
|
||||
}
|
||||
}
|
||||
|
||||
// endwithFunc is a XPath functions ends-with(string, string).
|
||||
func endwithFunc(arg1, arg2 query) func(query, iterator) interface{} {
|
||||
return func(_ query, t iterator) interface{} {
|
||||
var (
|
||||
m, n string
|
||||
ok bool
|
||||
)
|
||||
switch typ := functionArgs(arg1).Evaluate(t).(type) {
|
||||
case string:
|
||||
m = typ
|
||||
case query:
|
||||
node := typ.Select(t)
|
||||
if node == nil {
|
||||
return false
|
||||
}
|
||||
m = node.Value()
|
||||
default:
|
||||
panic(errors.New("ends-with() function argument type must be string"))
|
||||
}
|
||||
n, ok = functionArgs(arg2).Evaluate(t).(string)
|
||||
if !ok {
|
||||
panic(errors.New("ends-with() function argument type must be string"))
|
||||
}
|
||||
return strings.HasSuffix(m, n)
|
||||
}
|
||||
}
|
||||
|
||||
// containsFunc is a XPath functions contains(string or @attr, string).
|
||||
func containsFunc(arg1, arg2 query) func(query, iterator) interface{} {
|
||||
return func(_ query, t iterator) interface{} {
|
||||
var (
|
||||
m, n string
|
||||
ok bool
|
||||
)
|
||||
switch typ := functionArgs(arg1).Evaluate(t).(type) {
|
||||
case string:
|
||||
m = typ
|
||||
case query:
|
||||
node := typ.Select(t)
|
||||
if node == nil {
|
||||
return false
|
||||
}
|
||||
m = node.Value()
|
||||
default:
|
||||
panic(errors.New("contains() function argument type must be string"))
|
||||
}
|
||||
|
||||
n, ok = functionArgs(arg2).Evaluate(t).(string)
|
||||
if !ok {
|
||||
panic(errors.New("contains() function argument type must be string"))
|
||||
}
|
||||
|
||||
return strings.Contains(m, n)
|
||||
}
|
||||
}
|
||||
|
||||
// matchesFunc is an XPath function that tests a given string against a regexp pattern.
|
||||
// Note: does not support https://www.w3.org/TR/xpath-functions-31/#func-matches 3rd optional `flags` argument; if
|
||||
// needed, directly put flags in the regexp pattern, such as `(?i)^pattern$` for `i` flag.
|
||||
func matchesFunc(arg1, arg2 query) func(query, iterator) interface{} {
|
||||
return func(_ query, t iterator) interface{} {
|
||||
var s string
|
||||
switch typ := functionArgs(arg1).Evaluate(t).(type) {
|
||||
case string:
|
||||
s = typ
|
||||
case query:
|
||||
node := typ.Select(t)
|
||||
if node == nil {
|
||||
return ""
|
||||
}
|
||||
s = node.Value()
|
||||
}
|
||||
var pattern string
|
||||
var ok bool
|
||||
if pattern, ok = functionArgs(arg2).Evaluate(t).(string); !ok {
|
||||
panic(errors.New("matches() function second argument type must be string"))
|
||||
}
|
||||
re, err := getRegexp(pattern)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("matches() function second argument is not a valid regexp pattern, err: %s", err.Error()))
|
||||
}
|
||||
return re.MatchString(s)
|
||||
}
|
||||
}
|
||||
|
||||
// normalizespaceFunc is XPath functions normalize-space(string?)
|
||||
func normalizespaceFunc(arg1 query) func(query, iterator) interface{} {
|
||||
return func(_ query, t iterator) interface{} {
|
||||
var m string
|
||||
switch typ := functionArgs(arg1).Evaluate(t).(type) {
|
||||
case string:
|
||||
m = typ
|
||||
case query:
|
||||
node := typ.Select(t)
|
||||
if node == nil {
|
||||
return ""
|
||||
}
|
||||
m = node.Value()
|
||||
}
|
||||
var b = builderPool.Get().(stringBuilder)
|
||||
b.Grow(len(m))
|
||||
|
||||
runeStr := []rune(strings.TrimSpace(m))
|
||||
l := len(runeStr)
|
||||
for i := range runeStr {
|
||||
r := runeStr[i]
|
||||
isSpace := unicode.IsSpace(r)
|
||||
if !(isSpace && (i+1 < l && unicode.IsSpace(runeStr[i+1]))) {
|
||||
if isSpace {
|
||||
r = ' '
|
||||
}
|
||||
b.WriteRune(r)
|
||||
}
|
||||
}
|
||||
result := b.String()
|
||||
b.Reset()
|
||||
builderPool.Put(b)
|
||||
|
||||
return result
|
||||
}
|
||||
}
|
||||
|
||||
// substringFunc is XPath functions substring function returns a part of a given string.
|
||||
func substringFunc(arg1, arg2, arg3 query) func(query, iterator) interface{} {
|
||||
return func(_ query, t iterator) interface{} {
|
||||
var m string
|
||||
switch typ := functionArgs(arg1).Evaluate(t).(type) {
|
||||
case string:
|
||||
m = typ
|
||||
case query:
|
||||
node := typ.Select(t)
|
||||
if node == nil {
|
||||
return ""
|
||||
}
|
||||
m = node.Value()
|
||||
}
|
||||
|
||||
var start, length float64
|
||||
var ok bool
|
||||
|
||||
if start, ok = functionArgs(arg2).Evaluate(t).(float64); !ok {
|
||||
panic(errors.New("substring() function first argument type must be int"))
|
||||
} else if start < 1 {
|
||||
panic(errors.New("substring() function first argument type must be >= 1"))
|
||||
}
|
||||
start--
|
||||
if arg3 != nil {
|
||||
if length, ok = functionArgs(arg3).Evaluate(t).(float64); !ok {
|
||||
panic(errors.New("substring() function second argument type must be int"))
|
||||
}
|
||||
}
|
||||
if (len(m) - int(start)) < int(length) {
|
||||
panic(errors.New("substring() function start and length argument out of range"))
|
||||
}
|
||||
if length > 0 {
|
||||
return m[int(start):int(length+start)]
|
||||
}
|
||||
return m[int(start):]
|
||||
}
|
||||
}
|
||||
|
||||
// substringIndFunc is XPath functions substring-before/substring-after function returns a part of a given string.
|
||||
func substringIndFunc(arg1, arg2 query, after bool) func(query, iterator) interface{} {
|
||||
return func(_ query, t iterator) interface{} {
|
||||
var str string
|
||||
switch v := functionArgs(arg1).Evaluate(t).(type) {
|
||||
case string:
|
||||
str = v
|
||||
case query:
|
||||
node := v.Select(t)
|
||||
if node == nil {
|
||||
return ""
|
||||
}
|
||||
str = node.Value()
|
||||
}
|
||||
var word string
|
||||
switch v := functionArgs(arg2).Evaluate(t).(type) {
|
||||
case string:
|
||||
word = v
|
||||
case query:
|
||||
node := v.Select(t)
|
||||
if node == nil {
|
||||
return ""
|
||||
}
|
||||
word = node.Value()
|
||||
}
|
||||
if word == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
i := strings.Index(str, word)
|
||||
if i < 0 {
|
||||
return ""
|
||||
}
|
||||
if after {
|
||||
return str[i+len(word):]
|
||||
}
|
||||
return str[:i]
|
||||
}
|
||||
}
|
||||
|
||||
// stringLengthFunc is XPATH string-length( [string] ) function that returns a number
|
||||
// equal to the number of characters in a given string.
|
||||
func stringLengthFunc(arg1 query) func(query, iterator) interface{} {
|
||||
return func(_ query, t iterator) interface{} {
|
||||
switch v := functionArgs(arg1).Evaluate(t).(type) {
|
||||
case string:
|
||||
return float64(len(v))
|
||||
case query:
|
||||
node := v.Select(t)
|
||||
if node == nil {
|
||||
break
|
||||
}
|
||||
return float64(len(node.Value()))
|
||||
}
|
||||
return float64(0)
|
||||
}
|
||||
}
|
||||
|
||||
// translateFunc is XPath functions translate() function returns a replaced string.
|
||||
func translateFunc(arg1, arg2, arg3 query) func(query, iterator) interface{} {
|
||||
return func(_ query, t iterator) interface{} {
|
||||
str := asString(t, functionArgs(arg1).Evaluate(t))
|
||||
src := asString(t, functionArgs(arg2).Evaluate(t))
|
||||
dst := asString(t, functionArgs(arg3).Evaluate(t))
|
||||
|
||||
replace := make([]string, 0, len(src))
|
||||
for i, s := range src {
|
||||
d := ""
|
||||
if i < len(dst) {
|
||||
d = string(dst[i])
|
||||
}
|
||||
replace = append(replace, string(s), d)
|
||||
}
|
||||
return strings.NewReplacer(replace...).Replace(str)
|
||||
}
|
||||
}
|
||||
|
||||
// replaceFunc is XPath functions replace() function returns a replaced string.
|
||||
func replaceFunc(arg1, arg2, arg3 query) func(query, iterator) interface{} {
|
||||
return func(_ query, t iterator) interface{} {
|
||||
str := asString(t, functionArgs(arg1).Evaluate(t))
|
||||
src := asString(t, functionArgs(arg2).Evaluate(t))
|
||||
dst := asString(t, functionArgs(arg3).Evaluate(t))
|
||||
|
||||
return strings.Replace(str, src, dst, -1)
|
||||
}
|
||||
}
|
||||
|
||||
// notFunc is XPATH functions not(expression) function operation.
|
||||
func notFunc(arg1 query) func(query, iterator) interface{} {
|
||||
return func(_ query, t iterator) interface{} {
|
||||
switch v := functionArgs(arg1).Evaluate(t).(type) {
|
||||
case bool:
|
||||
return !v
|
||||
case query:
|
||||
node := v.Select(t)
|
||||
return node == nil
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// concatFunc is the concat function concatenates two or more
|
||||
// strings and returns the resulting string.
|
||||
// concat( string1 , string2 [, stringn]* )
|
||||
func concatFunc(args ...query) func(query, iterator) interface{} {
|
||||
return func(_ query, t iterator) interface{} {
|
||||
b := builderPool.Get().(stringBuilder)
|
||||
for _, v := range args {
|
||||
v = functionArgs(v)
|
||||
|
||||
switch v := v.Evaluate(t).(type) {
|
||||
case string:
|
||||
b.WriteString(v)
|
||||
case query:
|
||||
node := v.Select(t)
|
||||
if node != nil {
|
||||
b.WriteString(node.Value())
|
||||
}
|
||||
}
|
||||
}
|
||||
result := b.String()
|
||||
b.Reset()
|
||||
builderPool.Put(b)
|
||||
|
||||
return result
|
||||
}
|
||||
}
|
||||
|
||||
// https://github.com/antchfx/xpath/issues/43
|
||||
func functionArgs(q query) query {
|
||||
if _, ok := q.(*functionQuery); ok {
|
||||
return q
|
||||
}
|
||||
return q.Clone()
|
||||
}
|
||||
|
||||
func reverseFunc(q query, t iterator) func() NodeNavigator {
|
||||
var list []NodeNavigator
|
||||
for {
|
||||
node := q.Select(t)
|
||||
if node == nil {
|
||||
break
|
||||
}
|
||||
list = append(list, node.Copy())
|
||||
}
|
||||
i := len(list)
|
||||
return func() NodeNavigator {
|
||||
if i <= 0 {
|
||||
return nil
|
||||
}
|
||||
i--
|
||||
node := list[i]
|
||||
return node
|
||||
}
|
||||
}
|
||||
|
||||
// string-join is a XPath Node Set functions string-join(node-set, separator).
|
||||
func stringJoinFunc(q, arg1 query) func(query, iterator) interface{} {
|
||||
return func(_ query, t iterator) interface{} {
|
||||
var separator string
|
||||
switch v := functionArgs(arg1).Evaluate(t).(type) {
|
||||
case string:
|
||||
separator = v
|
||||
case query:
|
||||
node := v.Select(t)
|
||||
if node != nil {
|
||||
separator = node.Value()
|
||||
}
|
||||
}
|
||||
|
||||
q = functionArgs(q)
|
||||
test := predicate(q)
|
||||
var parts []string
|
||||
switch v := q.Evaluate(t).(type) {
|
||||
case string:
|
||||
return v
|
||||
case query:
|
||||
for node := v.Select(t); node != nil; node = v.Select(t) {
|
||||
if test(node) {
|
||||
parts = append(parts, node.Value())
|
||||
}
|
||||
}
|
||||
}
|
||||
return strings.Join(parts, separator)
|
||||
}
|
||||
}
|
||||
|
||||
// lower-case is XPATH function that converts a string to lower case.
|
||||
func lowerCaseFunc(arg1 query) func(query, iterator) interface{} {
|
||||
return func(_ query, t iterator) interface{} {
|
||||
v := functionArgs(arg1).Evaluate(t)
|
||||
return strings.ToLower(asString(t, v))
|
||||
}
|
||||
}
|
||||
+16
@@ -0,0 +1,16 @@
|
||||
// +build go1.10
|
||||
|
||||
package xpath
|
||||
|
||||
import (
|
||||
"math"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func round(f float64) int {
|
||||
return int(math.Round(f))
|
||||
}
|
||||
|
||||
func newStringBuilder() stringBuilder {
|
||||
return &strings.Builder{}
|
||||
}
|
||||
+22
@@ -0,0 +1,22 @@
|
||||
// +build !go1.10
|
||||
|
||||
package xpath
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"math"
|
||||
)
|
||||
|
||||
// math.Round() is supported by Go 1.10+,
|
||||
// This method just compatible for version <1.10.
|
||||
// https://github.com/golang/go/issues/20100
|
||||
func round(f float64) int {
|
||||
if math.Abs(f) < 0.5 {
|
||||
return 0
|
||||
}
|
||||
return int(f + math.Copysign(0.5, f))
|
||||
}
|
||||
|
||||
func newStringBuilder() stringBuilder {
|
||||
return &bytes.Buffer{}
|
||||
}
|
||||
+288
@@ -0,0 +1,288 @@
|
||||
package xpath
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
)
|
||||
|
||||
// The XPath number operator function list.
|
||||
|
||||
type logical func(iterator, string, interface{}, interface{}) bool
|
||||
|
||||
var logicalFuncs = [][]logical{
|
||||
{cmpBooleanBoolean, nil, nil, nil},
|
||||
{nil, cmpNumericNumeric, cmpNumericString, cmpNumericNodeSet},
|
||||
{nil, cmpStringNumeric, cmpStringString, cmpStringNodeSet},
|
||||
{nil, cmpNodeSetNumeric, cmpNodeSetString, cmpNodeSetNodeSet},
|
||||
}
|
||||
|
||||
// number vs number
|
||||
func cmpNumberNumberF(op string, a, b float64) bool {
|
||||
switch op {
|
||||
case "=":
|
||||
return a == b
|
||||
case ">":
|
||||
return a > b
|
||||
case "<":
|
||||
return a < b
|
||||
case ">=":
|
||||
return a >= b
|
||||
case "<=":
|
||||
return a <= b
|
||||
case "!=":
|
||||
return a != b
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// string vs string
|
||||
func cmpStringStringF(op string, a, b string) bool {
|
||||
switch op {
|
||||
case "=":
|
||||
return a == b
|
||||
case ">":
|
||||
return a > b
|
||||
case "<":
|
||||
return a < b
|
||||
case ">=":
|
||||
return a >= b
|
||||
case "<=":
|
||||
return a <= b
|
||||
case "!=":
|
||||
return a != b
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func cmpBooleanBooleanF(op string, a, b bool) bool {
|
||||
switch op {
|
||||
case "or":
|
||||
return a || b
|
||||
case "and":
|
||||
return a && b
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func cmpNumericNumeric(t iterator, op string, m, n interface{}) bool {
|
||||
a := m.(float64)
|
||||
b := n.(float64)
|
||||
return cmpNumberNumberF(op, a, b)
|
||||
}
|
||||
|
||||
func cmpNumericString(t iterator, op string, m, n interface{}) bool {
|
||||
a := m.(float64)
|
||||
b := n.(string)
|
||||
num, err := strconv.ParseFloat(b, 64)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return cmpNumberNumberF(op, a, num)
|
||||
}
|
||||
|
||||
func cmpNumericNodeSet(t iterator, op string, m, n interface{}) bool {
|
||||
a := m.(float64)
|
||||
b := n.(query)
|
||||
|
||||
for {
|
||||
node := b.Select(t)
|
||||
if node == nil {
|
||||
break
|
||||
}
|
||||
num, err := strconv.ParseFloat(node.Value(), 64)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
if cmpNumberNumberF(op, a, num) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func cmpNodeSetNumeric(t iterator, op string, m, n interface{}) bool {
|
||||
a := m.(query)
|
||||
b := n.(float64)
|
||||
for {
|
||||
node := a.Select(t)
|
||||
if node == nil {
|
||||
break
|
||||
}
|
||||
num, err := strconv.ParseFloat(node.Value(), 64)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
if cmpNumberNumberF(op, num, b) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func cmpNodeSetString(t iterator, op string, m, n interface{}) bool {
|
||||
a := m.(query)
|
||||
b := n.(string)
|
||||
for {
|
||||
node := a.Select(t)
|
||||
if node == nil {
|
||||
break
|
||||
}
|
||||
if cmpStringStringF(op, b, node.Value()) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func cmpNodeSetNodeSet(t iterator, op string, m, n interface{}) bool {
|
||||
a := m.(query)
|
||||
b := n.(query)
|
||||
for {
|
||||
x := a.Select(t)
|
||||
if x == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
y := b.Select(t)
|
||||
if y == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
for {
|
||||
if cmpStringStringF(op, x.Value(), y.Value()) {
|
||||
return true
|
||||
}
|
||||
if y = b.Select(t); y == nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
// reset
|
||||
b.Evaluate(t)
|
||||
}
|
||||
}
|
||||
|
||||
func cmpStringNumeric(t iterator, op string, m, n interface{}) bool {
|
||||
a := m.(string)
|
||||
b := n.(float64)
|
||||
num, err := strconv.ParseFloat(a, 64)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return cmpNumberNumberF(op, b, num)
|
||||
}
|
||||
|
||||
func cmpStringString(t iterator, op string, m, n interface{}) bool {
|
||||
a := m.(string)
|
||||
b := n.(string)
|
||||
return cmpStringStringF(op, a, b)
|
||||
}
|
||||
|
||||
func cmpStringNodeSet(t iterator, op string, m, n interface{}) bool {
|
||||
a := m.(string)
|
||||
b := n.(query)
|
||||
for {
|
||||
node := b.Select(t)
|
||||
if node == nil {
|
||||
break
|
||||
}
|
||||
if cmpStringStringF(op, a, node.Value()) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func cmpBooleanBoolean(t iterator, op string, m, n interface{}) bool {
|
||||
a := m.(bool)
|
||||
b := n.(bool)
|
||||
return cmpBooleanBooleanF(op, a, b)
|
||||
}
|
||||
|
||||
// eqFunc is an `=` operator.
|
||||
func eqFunc(t iterator, m, n interface{}) interface{} {
|
||||
t1 := getXPathType(m)
|
||||
t2 := getXPathType(n)
|
||||
return logicalFuncs[t1][t2](t, "=", m, n)
|
||||
}
|
||||
|
||||
// gtFunc is an `>` operator.
|
||||
func gtFunc(t iterator, m, n interface{}) interface{} {
|
||||
t1 := getXPathType(m)
|
||||
t2 := getXPathType(n)
|
||||
return logicalFuncs[t1][t2](t, ">", m, n)
|
||||
}
|
||||
|
||||
// geFunc is an `>=` operator.
|
||||
func geFunc(t iterator, m, n interface{}) interface{} {
|
||||
t1 := getXPathType(m)
|
||||
t2 := getXPathType(n)
|
||||
return logicalFuncs[t1][t2](t, ">=", m, n)
|
||||
}
|
||||
|
||||
// ltFunc is an `<` operator.
|
||||
func ltFunc(t iterator, m, n interface{}) interface{} {
|
||||
t1 := getXPathType(m)
|
||||
t2 := getXPathType(n)
|
||||
return logicalFuncs[t1][t2](t, "<", m, n)
|
||||
}
|
||||
|
||||
// leFunc is an `<=` operator.
|
||||
func leFunc(t iterator, m, n interface{}) interface{} {
|
||||
t1 := getXPathType(m)
|
||||
t2 := getXPathType(n)
|
||||
return logicalFuncs[t1][t2](t, "<=", m, n)
|
||||
}
|
||||
|
||||
// neFunc is an `!=` operator.
|
||||
func neFunc(t iterator, m, n interface{}) interface{} {
|
||||
t1 := getXPathType(m)
|
||||
t2 := getXPathType(n)
|
||||
return logicalFuncs[t1][t2](t, "!=", m, n)
|
||||
}
|
||||
|
||||
// orFunc is an `or` operator.
|
||||
var orFunc = func(t iterator, m, n interface{}) interface{} {
|
||||
t1 := getXPathType(m)
|
||||
t2 := getXPathType(n)
|
||||
return logicalFuncs[t1][t2](t, "or", m, n)
|
||||
}
|
||||
|
||||
func numericExpr(t iterator, m, n interface{}, cb func(float64, float64) float64) float64 {
|
||||
a := asNumber(t, m)
|
||||
b := asNumber(t, n)
|
||||
return cb(a, b)
|
||||
}
|
||||
|
||||
// plusFunc is an `+` operator.
|
||||
var plusFunc = func(t iterator, m, n interface{}) interface{} {
|
||||
return numericExpr(t, m, n, func(a, b float64) float64 {
|
||||
return a + b
|
||||
})
|
||||
}
|
||||
|
||||
// minusFunc is an `-` operator.
|
||||
var minusFunc = func(t iterator, m, n interface{}) interface{} {
|
||||
return numericExpr(t, m, n, func(a, b float64) float64 {
|
||||
return a - b
|
||||
})
|
||||
}
|
||||
|
||||
// mulFunc is an `*` operator.
|
||||
var mulFunc = func(t iterator, m, n interface{}) interface{} {
|
||||
return numericExpr(t, m, n, func(a, b float64) float64 {
|
||||
return a * b
|
||||
})
|
||||
}
|
||||
|
||||
// divFunc is an `DIV` operator.
|
||||
var divFunc = func(t iterator, m, n interface{}) interface{} {
|
||||
return numericExpr(t, m, n, func(a, b float64) float64 {
|
||||
return a / b
|
||||
})
|
||||
}
|
||||
|
||||
// modFunc is an 'MOD' operator.
|
||||
var modFunc = func(t iterator, m, n interface{}) interface{} {
|
||||
return numericExpr(t, m, n, func(a, b float64) float64 {
|
||||
return float64(int(a) % int(b))
|
||||
})
|
||||
}
|
||||
+1254
File diff suppressed because it is too large
Load Diff
+1437
File diff suppressed because it is too large
Load Diff
+176
@@ -0,0 +1,176 @@
|
||||
package xpath
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// NodeType represents a type of XPath node.
|
||||
type NodeType int
|
||||
|
||||
const (
|
||||
// RootNode is a root node of the XML document or node tree.
|
||||
RootNode NodeType = iota
|
||||
|
||||
// ElementNode is an element, such as <element>.
|
||||
ElementNode
|
||||
|
||||
// AttributeNode is an attribute, such as id='123'.
|
||||
AttributeNode
|
||||
|
||||
// TextNode is the text content of a node.
|
||||
TextNode
|
||||
|
||||
// CommentNode is a comment node, such as <!-- my comment -->
|
||||
CommentNode
|
||||
|
||||
// allNode is any types of node, used by xpath package only to predicate match.
|
||||
allNode
|
||||
)
|
||||
|
||||
// NodeNavigator provides cursor model for navigating XML data.
|
||||
type NodeNavigator interface {
|
||||
// NodeType returns the XPathNodeType of the current node.
|
||||
NodeType() NodeType
|
||||
|
||||
// LocalName gets the Name of the current node.
|
||||
LocalName() string
|
||||
|
||||
// Prefix returns namespace prefix associated with the current node.
|
||||
Prefix() string
|
||||
|
||||
// Value gets the value of current node.
|
||||
Value() string
|
||||
|
||||
// Copy does a deep copy of the NodeNavigator and all its components.
|
||||
Copy() NodeNavigator
|
||||
|
||||
// MoveToRoot moves the NodeNavigator to the root node of the current node.
|
||||
MoveToRoot()
|
||||
|
||||
// MoveToParent moves the NodeNavigator to the parent node of the current node.
|
||||
MoveToParent() bool
|
||||
|
||||
// MoveToNextAttribute moves the NodeNavigator to the next attribute on current node.
|
||||
MoveToNextAttribute() bool
|
||||
|
||||
// MoveToChild moves the NodeNavigator to the first child node of the current node.
|
||||
MoveToChild() bool
|
||||
|
||||
// MoveToFirst moves the NodeNavigator to the first sibling node of the current node.
|
||||
MoveToFirst() bool
|
||||
|
||||
// MoveToNext moves the NodeNavigator to the next sibling node of the current node.
|
||||
MoveToNext() bool
|
||||
|
||||
// MoveToPrevious moves the NodeNavigator to the previous sibling node of the current node.
|
||||
MoveToPrevious() bool
|
||||
|
||||
// MoveTo moves the NodeNavigator to the same position as the specified NodeNavigator.
|
||||
MoveTo(NodeNavigator) bool
|
||||
}
|
||||
|
||||
// NodeIterator holds all matched Node object.
|
||||
type NodeIterator struct {
|
||||
node NodeNavigator
|
||||
query query
|
||||
}
|
||||
|
||||
// Current returns current node which matched.
|
||||
func (t *NodeIterator) Current() NodeNavigator {
|
||||
return t.node
|
||||
}
|
||||
|
||||
// MoveNext moves Navigator to the next match node.
|
||||
func (t *NodeIterator) MoveNext() bool {
|
||||
n := t.query.Select(t)
|
||||
if n == nil {
|
||||
return false
|
||||
}
|
||||
if !t.node.MoveTo(n) {
|
||||
t.node = n.Copy()
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// Select selects a node set using the specified XPath expression.
|
||||
// This method is deprecated, recommend using Expr.Select() method instead.
|
||||
func Select(root NodeNavigator, expr string) *NodeIterator {
|
||||
exp, err := Compile(expr)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return exp.Select(root)
|
||||
}
|
||||
|
||||
// Expr is an XPath expression for query.
|
||||
type Expr struct {
|
||||
s string
|
||||
q query
|
||||
}
|
||||
|
||||
type iteratorFunc func() NodeNavigator
|
||||
|
||||
func (f iteratorFunc) Current() NodeNavigator {
|
||||
return f()
|
||||
}
|
||||
|
||||
// Evaluate returns the result of the expression.
|
||||
// The result type of the expression is one of the follow: bool,float64,string,NodeIterator).
|
||||
func (expr *Expr) Evaluate(root NodeNavigator) interface{} {
|
||||
val := expr.q.Evaluate(iteratorFunc(func() NodeNavigator { return root }))
|
||||
switch val.(type) {
|
||||
case query:
|
||||
return &NodeIterator{query: expr.q.Clone(), node: root}
|
||||
}
|
||||
return val
|
||||
}
|
||||
|
||||
// Select selects a node set using the specified XPath expression.
|
||||
func (expr *Expr) Select(root NodeNavigator) *NodeIterator {
|
||||
return &NodeIterator{query: expr.q.Clone(), node: root}
|
||||
}
|
||||
|
||||
// String returns XPath expression string.
|
||||
func (expr *Expr) String() string {
|
||||
return expr.s
|
||||
}
|
||||
|
||||
// Compile compiles an XPath expression string.
|
||||
func Compile(expr string) (*Expr, error) {
|
||||
if expr == "" {
|
||||
return nil, errors.New("expr expression is nil")
|
||||
}
|
||||
qy, err := build(expr, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if qy == nil {
|
||||
return nil, fmt.Errorf(fmt.Sprintf("undeclared variable in XPath expression: %s", expr))
|
||||
}
|
||||
return &Expr{s: expr, q: qy}, nil
|
||||
}
|
||||
|
||||
// MustCompile compiles an XPath expression string and ignored error.
|
||||
func MustCompile(expr string) *Expr {
|
||||
exp, err := Compile(expr)
|
||||
if err != nil {
|
||||
return &Expr{s: expr, q: nopQuery{}}
|
||||
}
|
||||
return exp
|
||||
}
|
||||
|
||||
// CompileWithNS compiles an XPath expression string, using given namespaces map.
|
||||
func CompileWithNS(expr string, namespaces map[string]string) (*Expr, error) {
|
||||
if expr == "" {
|
||||
return nil, errors.New("expr expression is nil")
|
||||
}
|
||||
qy, err := build(expr, namespaces)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if qy == nil {
|
||||
return nil, fmt.Errorf(fmt.Sprintf("undeclared variable in XPath expression: %s", expr))
|
||||
}
|
||||
return &Expr{s: expr, q: qy}, nil
|
||||
}
|
||||
Reference in New Issue
Block a user