| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201 | 
							- package htmlhelp
 
- import (
 
- 	"fmt"
 
- 	"io"
 
- 	"strings"
 
- 	"golang.org/x/net/html"
 
- )
 
- func RecursiveTree(output io.Writer, doc *html.Node, level int) {
 
- 	var path *html.Node
 
- 	var spaces string = strings.Repeat(" ", level*2)
 
- 	for path = doc.FirstChild; path != nil; path = path.NextSibling {
 
- 		switch path.Type {
 
- 		case html.ElementNode:
 
- 			output.Write([]byte(fmt.Sprintf("%s%s\n", spaces, path.Data)))
 
- 		case html.TextNode:
 
- 			output.Write([]byte(fmt.Sprintf("%sTEXT: %#v\n", spaces, path.Data)))
 
- 		case html.DoctypeNode:
 
- 			output.Write([]byte(fmt.Sprintf("%sDoctype: %s\n", spaces, path.Data)))
 
- 		default:
 
- 			output.Write([]byte(fmt.Sprintf("?? %#v\n", path)))
 
- 		}
 
- 		if path.FirstChild != nil {
 
- 			RecursiveTree(output, path, level+1)
 
- 		}
 
- 	}
 
- }
 
- func OutputTree(output io.Writer, doc *html.Node) {
 
- 	RecursiveTree(output, doc, 0)
 
- }
 
- // Find one matching Node
 
- func RecursiveFindOne(doc *html.Node, match func(doc *html.Node) bool) *html.Node {
 
- 	var path *html.Node
 
- 	for path = doc.FirstChild; path != nil; path = path.NextSibling {
 
- 		if match(path) {
 
- 			return path
 
- 		}
 
- 		var nested *html.Node = RecursiveFindOne(path, match)
 
- 		if nested != nil {
 
- 			return nested
 
- 		}
 
- 	}
 
- 	return nil
 
- }
 
- // Find all matching Nodes
 
- func RecursiveFindAll(doc *html.Node, match func(doc *html.Node) bool) []*html.Node {
 
- 	var path *html.Node
 
- 	var results []*html.Node
 
- 	for path = doc.FirstChild; path != nil; path = path.NextSibling {
 
- 		if match(path) {
 
- 			results = append(results, path)
 
- 		}
 
- 		var nested []*html.Node = RecursiveFindAll(path, match)
 
- 		if len(nested) != 0 {
 
- 			results = append(results, nested...)
 
- 		}
 
- 	}
 
- 	return results
 
- }
 
- func GetAttr(node *html.Node, attrKey string) (string, bool) {
 
- 	var attr html.Attribute
 
- 	for _, attr = range node.Attr {
 
- 		if attr.Key == attrKey {
 
- 			return attr.Val, true
 
- 		}
 
- 	}
 
- 	return "", false
 
- }
 
- func HasAttr(node *html.Node, attrKey string, attrValue string) bool {
 
- 	var attr html.Attribute
 
- 	for _, attr = range node.Attr {
 
- 		if attr.Key == attrKey {
 
- 			var val string
 
- 			for _, val = range strings.Split(attr.Val, " ") {
 
- 				if val == attrValue {
 
- 					return true
 
- 				}
 
- 			}
 
- 		}
 
- 	}
 
- 	return false
 
- }
 
- func MatchTag(tag string) func(*html.Node) bool {
 
- 	return func(node *html.Node) bool {
 
- 		return node.Type == html.ElementNode && node.Data == tag
 
- 	}
 
- }
 
- func MatchNestedTags(tags ...string) func(*html.Node) bool {
 
- 	// reverse the order of the tags
 
- 	for left, right := 0, len(tags)-1; left < right; left, right = left+1, right-1 {
 
- 		tags[left], tags[right] = tags[right], tags[left]
 
- 	}
 
- 	return func(node *html.Node) bool {
 
- 		var rev string
 
- 		var walk *html.Node = node
 
- 		for _, rev = range tags {
 
- 			if walk.Type == html.ElementNode && walk.Data == rev {
 
- 				// This never happens.  Every html.ElementNode has a parent.
 
- 				// html has parent of DocumentNode.
 
- 				// This got coverage by altering the document: See TestNilParent.
 
- 				if walk.Parent == nil {
 
- 					return false
 
- 				}
 
- 				walk = walk.Parent
 
- 			} else {
 
- 				return false
 
- 			}
 
- 		}
 
- 		return true
 
- 	}
 
- }
 
- func MatchTagAttr(tag string, attrKey string, attrValue string) func(*html.Node) bool {
 
- 	return func(node *html.Node) bool {
 
- 		if node.Type == html.ElementNode && node.Data == tag {
 
- 			if HasAttr(node, attrKey, attrValue) {
 
- 				return true
 
- 			}
 
- 		}
 
- 		return false
 
- 	}
 
- }
 
- func FindTagsAttr(doc *html.Node, tag string, attrKey string, attrValue string) []*html.Node {
 
- 	var tagMatch func(*html.Node) bool
 
- 	tagMatch = func(node *html.Node) bool {
 
- 		if node.Type == html.ElementNode && node.Data == tag {
 
- 			if HasAttr(node, attrKey, attrValue) {
 
- 				return true
 
- 			}
 
- 		}
 
- 		return false
 
- 	}
 
- 	return RecursiveFindAll(doc, tagMatch)
 
- }
 
- func FindTags(doc *html.Node, tag string) []*html.Node {
 
- 	var tagMatch func(*html.Node) bool
 
- 	tagMatch = func(node *html.Node) bool {
 
- 		return node.Type == html.ElementNode && node.Data == tag
 
- 	}
 
- 	return RecursiveFindAll(doc, tagMatch)
 
- }
 
- // GetText: can't tell if the text is part of the element, or follows
 
- // the element.  This only gets the first child text node.
 
- // So something like: <div><b>Missing</b></div> for the div would be "".
 
- func GetText(node *html.Node) string {
 
- 	node = node.FirstChild
 
- 	if node != nil {
 
- 		if node.Type == html.TextNode {
 
- 			return node.Data
 
- 		}
 
- 	}
 
- 	return ""
 
- }
 
- // GetAllText: collects all the text nodes within the given node.
 
- // <div><b>Text</b> Within</div> would return "Text Within"
 
- func GetAllText(node *html.Node) string {
 
- 	var results string
 
- 	if node.Type == html.TextNode {
 
- 		results += node.Data
 
- 	}
 
- 	for node = node.FirstChild; node != nil; node = node.NextSibling {
 
- 		results += GetAllText(node)
 
- 	}
 
- 	return results
 
- }
 
- func GetAllTextBR(node *html.Node) string {
 
- 	var results string
 
- 	if node.Type == html.TextNode {
 
- 		results += node.Data
 
- 	}
 
- 	for node = node.FirstChild; node != nil; node = node.NextSibling {
 
- 		if node.Type == html.ElementNode && node.Data == "br" {
 
- 			results += "\n"
 
- 		} else {
 
- 			results += GetAllTextBR(node)
 
- 		}
 
- 	}
 
- 	return results
 
- }
 
 
  |