package domquery import ( "fmt" "strings" ) /* End Node Functions */ func LoadHTML(htmlStr string) *Node { tokList := getTokenList(htmlStr) bt := buildTree(tokList) return bt } func getTokenList(htmlStr string) []string { var tokList []string var tok string var isPhp bool for i := 0; i < len(htmlStr); i++ { chr := string(htmlStr[i]) if strings.Contains(tok, " 0 { if strings.Contains(tokList[len(tokList)-1], " 0 { if strings.Contains(tokList[len(tokList)-1], "" { if string(htmlStr[i-1]) == "?" { isPhp = false tok += chr continue } if isPhp == true { tok += chr continue } tok += chr tokList = append(tokList, tok) tok = "" continue } tok += chr } tokList = append(tokList, tok) return tokList } func buildTree(tokList []string) *Node { root := &Node{} node := &Node{} node.Parent = root for _, tok := range tokList { prev := node prev.token = tok node = &Node{} if getTokType(tok) == "open" { node.Parent = prev } if getTokType(tok) == "text" { node.Parent = prev.Parent } if getTokType(tok) == "selfclosing" { node.Parent = prev.Parent } if getTokType(tok) == "comment" { node.Parent = prev.Parent } if getTokType(tok) == "close" { prev.Parent = prev.Parent.Parent node.Parent = prev.Parent } prev.Parent.Children = append(prev.Parent.Children, prev) } return root } func getTokType(tok string) string { if len(tok) < 2 { return "text" } fc := string(tok[0]) sc := string(tok[1]) if sc == "/" { return "close" } if fc == "<" && sc == "!" { return "comment" } if fc == "<" && sc == "?" { return "text" } if fc == "<" { if isSelfClosing(tok) { return "selfclosing" } return "open" } return "text" } func isSelfClosing(tok string) bool { tags := map[string]bool{ "area": true, "base": true, "br": true, "col": true, "embed": true, "hr": true, "img": true, "input": true, "link": true, "meta": true, "param": true, "source": true, "track": true, "wbr": true, } return tags[getTagName(tok)] } func getTagName(tok string) string { tName := "" for i := 1; i < len(tok); i++ { chr := string(tok[i]) if chr == " " || chr == ">" { break } tName += chr } return tName } func getCloseNode(node *Node) *Node { if getTokType(node.token) != "open" { return &Node{} } idx := 0 for i, child := range node.Parent.Children { if child == node { idx = i break } } idx = idx + 1 if idx > len(node.Parent.Children)-1 { fmt.Println("Parse Error: Unclosed tag in " + node.token) idx-- } return node.Parent.Children[idx] } func matchSelector(node *Node, sel string) bool { if getTokType(node.token) == "close" { return false } if getTagName(node.token) == sel { return true } if "#"+node.GetAttribute("id") == sel { return true } for _, class := range node.ClassList() { if "."+class == sel { return true } } return false }