123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239 |
- package domquery
- import (
- "fmt"
- "strings"
- )
- /* End Node Functions */
- func LoadHTML(htmlStr string) *Node {
- tokList := getTokenList(htmlStr)
- bt := buildTree(tokList)
- return bt
- }
- func getTokenList(htmlStr string) []string {
- var tokList []string
- var tok string
- var isPhp bool
- for i := 0; i < len(htmlStr); i++ {
- chr := string(htmlStr[i])
- if strings.Contains(tok, "</style") {
- tok = strings.Replace(tok, "</style", "", 1)
- tokList = append(tokList, tok)
- tok = "</style>"
- tokList = append(tokList, tok)
- tok = ""
- continue
- }
- if len(tokList) > 0 {
- if strings.Contains(tokList[len(tokList)-1], "<style") {
- tok += chr
- continue
- }
- }
- if strings.Contains(tok, "</script") {
- tok = strings.Replace(tok, "</script", "", 1)
- tokList = append(tokList, tok)
- tok = "</script>"
- tokList = append(tokList, tok)
- tok = ""
- continue
- }
- if len(tokList) > 0 {
- if strings.Contains(tokList[len(tokList)-1], "<script") {
- tok += chr
- continue
- }
- }
- if chr == "<" {
- if string(htmlStr[i+1]) == "?" {
- isPhp = true
- tok += chr
- continue
- }
- if isPhp == true {
- tok += chr
- continue
- }
- tokList = append(tokList, tok)
- tok = chr
- continue
- }
- if chr == ">" {
- if string(htmlStr[i-1]) == "?" {
- isPhp = false
- tok += chr
- continue
- }
- if isPhp == true {
- tok += chr
- continue
- }
- tok += chr
- tokList = append(tokList, tok)
- tok = ""
- continue
- }
- tok += chr
- }
- tokList = append(tokList, tok)
- return tokList
- }
- func buildTree(tokList []string) *Node {
- root := &Node{}
- node := &Node{}
- node.Parent = root
- for _, tok := range tokList {
- prev := node
- prev.token = tok
- node = &Node{}
- if getTokType(tok) == "open" {
- node.Parent = prev
- }
- if getTokType(tok) == "text" {
- node.Parent = prev.Parent
- }
- if getTokType(tok) == "selfclosing" {
- node.Parent = prev.Parent
- }
- if getTokType(tok) == "comment" {
- node.Parent = prev.Parent
- }
- if getTokType(tok) == "close" {
- prev.Parent = prev.Parent.Parent
- node.Parent = prev.Parent
- }
- prev.Parent.Children = append(prev.Parent.Children, prev)
- }
- return root
- }
- func getTokType(tok string) string {
- if len(tok) < 2 {
- return "text"
- }
- fc := string(tok[0])
- sc := string(tok[1])
- if sc == "/" {
- return "close"
- }
- if fc == "<" && sc == "!" {
- return "comment"
- }
- if fc == "<" && sc == "?" {
- return "text"
- }
- if fc == "<" {
- if isSelfClosing(tok) {
- return "selfclosing"
- }
- return "open"
- }
- return "text"
- }
- func isSelfClosing(tok string) bool {
- tags := map[string]bool{
- "area": true,
- "base": true,
- "br": true,
- "col": true,
- "embed": true,
- "hr": true,
- "img": true,
- "input": true,
- "link": true,
- "meta": true,
- "param": true,
- "source": true,
- "track": true,
- "wbr": true,
- }
- return tags[getTagName(tok)]
- }
- func getTagName(tok string) string {
- tName := ""
- for i := 1; i < len(tok); i++ {
- chr := string(tok[i])
- if chr == " " || chr == ">" {
- break
- }
- tName += chr
- }
- return tName
- }
- func getCloseNode(node *Node) *Node {
- if getTokType(node.token) != "open" {
- return &Node{}
- }
- idx := 0
- for i, child := range node.Parent.Children {
- if child == node {
- idx = i
- break
- }
- }
- idx = idx + 1
- if idx > len(node.Parent.Children)-1 {
- fmt.Println("Parse Error: Unclosed tag in " + node.token)
- idx--
- }
- return node.Parent.Children[idx]
- }
- func matchSelector(node *Node, sel string) bool {
- if getTokType(node.token) == "close" {
- return false
- }
- if getTagName(node.token) == sel {
- return true
- }
- if "#"+node.GetAttribute("id") == sel {
- return true
- }
- for _, class := range node.ClassList() {
- if "."+class == sel {
- return true
- }
- }
- return false
- }
|