david
/
htmlhelp


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201
							package htmlhelp

import (
	"fmt"
	"io"
	"strings"

	"golang.org/x/net/html"
)

func RecursiveTree(output io.Writer, doc *html.Node, level int) {
	var path *html.Node
	var spaces string = strings.Repeat(" ", level*2)

	for path = doc.FirstChild; path != nil; path = path.NextSibling {
		switch path.Type {
		case html.ElementNode:
			output.Write([]byte(fmt.Sprintf("%s%s\n", spaces, path.Data)))
		case html.TextNode:
			output.Write([]byte(fmt.Sprintf("%sTEXT: %#v\n", spaces, path.Data)))
		case html.DoctypeNode:
			output.Write([]byte(fmt.Sprintf("%sDoctype: %s\n", spaces, path.Data)))
		default:
			output.Write([]byte(fmt.Sprintf("?? %#v\n", path)))
		}

		if path.FirstChild != nil {
			RecursiveTree(output, path, level+1)
		}
	}
}

func OutputTree(output io.Writer, doc *html.Node) {
	RecursiveTree(output, doc, 0)
}

// Find one matching Node
func RecursiveFindOne(doc *html.Node, match func(doc *html.Node) bool) *html.Node {
	var path *html.Node

	for path = doc.FirstChild; path != nil; path = path.NextSibling {
		if match(path) {
			return path
		}
		var nested *html.Node = RecursiveFindOne(path, match)
		if nested != nil {
			return nested
		}
	}
	return nil
}

// Find all matching Nodes
func RecursiveFindAll(doc *html.Node, match func(doc *html.Node) bool) []*html.Node {
	var path *html.Node
	var results []*html.Node

	for path = doc.FirstChild; path != nil; path = path.NextSibling {
		if match(path) {
			results = append(results, path)
		}
		var nested []*html.Node = RecursiveFindAll(path, match)
		if len(nested) != 0 {
			results = append(results, nested...)
		}
	}
	return results
}

func GetAttr(node *html.Node, attrKey string) (string, bool) {
	var attr html.Attribute

	for _, attr = range node.Attr {
		if attr.Key == attrKey {
			return attr.Val, true
		}
	}
	return "", false
}

func HasAttr(node *html.Node, attrKey string, attrValue string) bool {
	var attr html.Attribute

	for _, attr = range node.Attr {
		if attr.Key == attrKey {
			var val string
			for _, val = range strings.Split(attr.Val, " ") {
				if val == attrValue {
					return true
				}
			}
		}
	}
	return false
}

func MatchTag(tag string) func(*html.Node) bool {
	return func(node *html.Node) bool {
		return node.Type == html.ElementNode && node.Data == tag
	}
}

func MatchNestedTags(tags ...string) func(*html.Node) bool {
	// reverse the order of the tags
	for left, right := 0, len(tags)-1; left < right; left, right = left+1, right-1 {
		tags[left], tags[right] = tags[right], tags[left]
	}

	return func(node *html.Node) bool {
		var rev string
		var walk *html.Node = node
		for _, rev = range tags {
			if walk.Type == html.ElementNode && walk.Data == rev {
				// This never happens.  Every html.ElementNode has a parent.
				// html has parent of DocumentNode.
				// This got coverage by altering the document: See TestNilParent.
				if walk.Parent == nil {
					return false
				}
				walk = walk.Parent
			} else {
				return false
			}
		}
		return true
	}
}

func MatchTagAttr(tag string, attrKey string, attrValue string) func(*html.Node) bool {
	return func(node *html.Node) bool {
		if node.Type == html.ElementNode && node.Data == tag {
			if HasAttr(node, attrKey, attrValue) {
				return true
			}
		}
		return false
	}
}

func FindTagsAttr(doc *html.Node, tag string, attrKey string, attrValue string) []*html.Node {
	var tagMatch func(*html.Node) bool
	tagMatch = func(node *html.Node) bool {
		if node.Type == html.ElementNode && node.Data == tag {
			if HasAttr(node, attrKey, attrValue) {
				return true
			}
		}
		return false
	}
	return RecursiveFindAll(doc, tagMatch)
}

func FindTags(doc *html.Node, tag string) []*html.Node {
	var tagMatch func(*html.Node) bool
	tagMatch = func(node *html.Node) bool {
		return node.Type == html.ElementNode && node.Data == tag
	}
	return RecursiveFindAll(doc, tagMatch)
}

// GetText: can't tell if the text is part of the element, or follows
// the element.  This only gets the first child text node.
// So something like: <div><b>Missing</b></div> for the div would be "".
func GetText(node *html.Node) string {
	node = node.FirstChild
	if node != nil {
		if node.Type == html.TextNode {
			return node.Data
		}
	}
	return ""
}

// GetAllText: collects all the text nodes within the given node.
// <div><b>Text</b> Within</div> would return "Text Within"
func GetAllText(node *html.Node) string {
	var results string
	if node.Type == html.TextNode {
		results += node.Data
	}
	for node = node.FirstChild; node != nil; node = node.NextSibling {
		results += GetAllText(node)
	}
	return results
}

func GetAllTextBR(node *html.Node) string {
	var results string

	if node.Type == html.TextNode {
		results += node.Data
	}
	for node = node.FirstChild; node != nil; node = node.NextSibling {
		if node.Type == html.ElementNode && node.Data == "br" {
			results += "\n"
		} else {
			results += GetAllTextBR(node)
		}
	}
	return results
}