Ver Fonte

Pushing html-help

  It appears package isn't quite right, standbye.
Apollo há 2 anos atrás
commit
055b099868
2 ficheiros alterados com 204 adições e 0 exclusões
  1. 3 0
      go.mod
  2. 201 0
      html-help.go

+ 3 - 0
go.mod

@@ -0,0 +1,3 @@
+module git.red-green.com/david/html-help
+
+go 1.20

+ 201 - 0
html-help.go

@@ -0,0 +1,201 @@
+package git.red-green.com/david/html-help
+
+import (
+	"fmt"
+	"io"
+	"strings"
+
+	"golang.org/x/net/html"
+)
+
+func RecursiveTree(output io.Writer, doc *html.Node, level int) {
+	var path *html.Node
+	var spaces string = strings.Repeat(" ", level*2)
+
+	for path = doc.FirstChild; path != nil; path = path.NextSibling {
+		switch path.Type {
+		case html.ElementNode:
+			output.Write([]byte(fmt.Sprintf("%s%s\n", spaces, path.Data)))
+		case html.TextNode:
+			output.Write([]byte(fmt.Sprintf("%sTEXT: %#v\n", spaces, path.Data)))
+		case html.DoctypeNode:
+			output.Write([]byte(fmt.Sprintf("%sDoctype: %s\n", spaces, path.Data)))
+		default:
+			output.Write([]byte(fmt.Sprintf("?? %#v\n", path)))
+		}
+
+		if path.FirstChild != nil {
+			RecursiveTree(output, path, level+1)
+		}
+	}
+}
+
+func OutputTree(output io.Writer, doc *html.Node) {
+	RecursiveTree(output, doc, 0)
+}
+
+// Find one matching Node
+func RecursiveFindOne(doc *html.Node, match func(doc *html.Node) bool) *html.Node {
+	var path *html.Node
+
+	for path = doc.FirstChild; path != nil; path = path.NextSibling {
+		if match(path) {
+			return path
+		}
+		var nested *html.Node = RecursiveFindOne(path, match)
+		if nested != nil {
+			return nested
+		}
+	}
+	return nil
+}
+
+// Find all matching Nodes
+func RecursiveFindAll(doc *html.Node, match func(doc *html.Node) bool) []*html.Node {
+	var path *html.Node
+	var results []*html.Node
+
+	for path = doc.FirstChild; path != nil; path = path.NextSibling {
+		if match(path) {
+			results = append(results, path)
+		}
+		var nested []*html.Node = RecursiveFindAll(path, match)
+		if len(nested) != 0 {
+			results = append(results, nested...)
+		}
+	}
+	return results
+}
+
+func GetAttr(node *html.Node, attrKey string) (string, bool) {
+	var attr html.Attribute
+
+	for _, attr = range node.Attr {
+		if attr.Key == attrKey {
+			return attr.Val, true
+		}
+	}
+	return "", false
+}
+
+func HasAttr(node *html.Node, attrKey string, attrValue string) bool {
+	var attr html.Attribute
+
+	for _, attr = range node.Attr {
+		if attr.Key == attrKey {
+			var val string
+			for _, val = range strings.Split(attr.Val, " ") {
+				if val == attrValue {
+					return true
+				}
+			}
+		}
+	}
+	return false
+}
+
+func MatchTag(tag string) func(*html.Node) bool {
+	return func(node *html.Node) bool {
+		return node.Type == html.ElementNode && node.Data == tag
+	}
+}
+
+func MatchNestedTags(tags ...string) func(*html.Node) bool {
+	// reverse the order of the tags
+	for left, right := 0, len(tags)-1; left < right; left, right = left+1, right-1 {
+		tags[left], tags[right] = tags[right], tags[left]
+	}
+
+	return func(node *html.Node) bool {
+		var rev string
+		var walk *html.Node = node
+		for _, rev = range tags {
+			if walk.Type == html.ElementNode && walk.Data == rev {
+				// This never happens.  Every html.ElementNode has a parent.
+				// html has parent of DocumentNode.
+				// This got coverage by altering the document: See TestNilParent.
+				if walk.Parent == nil {
+					return false
+				}
+				walk = walk.Parent
+			} else {
+				return false
+			}
+		}
+		return true
+	}
+}
+
+func MatchTagAttr(tag string, attrKey string, attrValue string) func(*html.Node) bool {
+	return func(node *html.Node) bool {
+		if node.Type == html.ElementNode && node.Data == tag {
+			if HasAttr(node, attrKey, attrValue) {
+				return true
+			}
+		}
+		return false
+	}
+}
+
+func FindTagsAttr(doc *html.Node, tag string, attrKey string, attrValue string) []*html.Node {
+	var tagMatch func(*html.Node) bool
+	tagMatch = func(node *html.Node) bool {
+		if node.Type == html.ElementNode && node.Data == tag {
+			if HasAttr(node, attrKey, attrValue) {
+				return true
+			}
+		}
+		return false
+	}
+	return RecursiveFindAll(doc, tagMatch)
+}
+
+func FindTags(doc *html.Node, tag string) []*html.Node {
+	var tagMatch func(*html.Node) bool
+	tagMatch = func(node *html.Node) bool {
+		return node.Type == html.ElementNode && node.Data == tag
+	}
+	return RecursiveFindAll(doc, tagMatch)
+}
+
+// GetText: can't tell if the text is part of the element, or follows
+// the element.  This only gets the first child text node.
+// So something like: <div><b>Missing</b></div> for the div would be "".
+func GetText(node *html.Node) string {
+	node = node.FirstChild
+	if node != nil {
+		if node.Type == html.TextNode {
+			return node.Data
+		}
+	}
+	return ""
+}
+
+// GetAllText: collects all the text nodes within the given node.
+// <div><b>Text</b> Within</div> would return "Text Within"
+func GetAllText(node *html.Node) string {
+	var results string
+	if node.Type == html.TextNode {
+		results += node.Data
+	}
+	for node = node.FirstChild; node != nil; node = node.NextSibling {
+		results += GetAllText(node)
+	}
+	return results
+}
+
+func GetAllTextBR(node *html.Node) string {
+	var results string
+
+	if node.Type == html.TextNode {
+		results += node.Data
+	}
+	for node = node.FirstChild; node != nil; node = node.NextSibling {
+		if node.Type == html.ElementNode && node.Data == "br" {
+			results += "\n"
+		} else {
+			results += GetAllTextBR(node)
+		}
+	}
+	return results
+}