htmlquery.go 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242
  1. package domquery
  2. import (
  3. "errors"
  4. "strings"
  5. "git.clearsky.net.au/cody/gex.git/utils"
  6. )
  7. /* End Node Functions */
  8. func LoadHTML(htmlStr string) *Node {
  9. tokList := getTokenList(htmlStr)
  10. bt := buildTree(tokList)
  11. return bt
  12. }
  13. func getTokenList(htmlStr string) []string {
  14. var tokList []string
  15. var tok string
  16. var isPhp bool
  17. for i := 0; i < len(htmlStr); i++ {
  18. chr := string(htmlStr[i])
  19. if strings.Contains(tok, "</style") {
  20. tok = strings.Replace(tok, "</style", "", 1)
  21. tokList = append(tokList, tok)
  22. tok = "</style>"
  23. tokList = append(tokList, tok)
  24. tok = ""
  25. continue
  26. }
  27. if len(tokList) > 0 {
  28. if strings.Contains(tokList[len(tokList)-1], "<style") {
  29. tok += chr
  30. continue
  31. }
  32. }
  33. if strings.Contains(tok, "</script") {
  34. tok = strings.Replace(tok, "</script", "", 1)
  35. tokList = append(tokList, tok)
  36. tok = "</script>"
  37. tokList = append(tokList, tok)
  38. tok = ""
  39. continue
  40. }
  41. if len(tokList) > 0 {
  42. if strings.Contains(tokList[len(tokList)-1], "<script") {
  43. tok += chr
  44. continue
  45. }
  46. }
  47. if chr == "<" {
  48. if string(htmlStr[i+1]) == "?" {
  49. isPhp = true
  50. tok += chr
  51. continue
  52. }
  53. if isPhp == true {
  54. tok += chr
  55. continue
  56. }
  57. tokList = append(tokList, tok)
  58. tok = chr
  59. continue
  60. }
  61. if chr == ">" {
  62. if string(htmlStr[i-1]) == "?" {
  63. isPhp = false
  64. tok += chr
  65. continue
  66. }
  67. if isPhp == true {
  68. tok += chr
  69. continue
  70. }
  71. tok += chr
  72. tokList = append(tokList, tok)
  73. tok = ""
  74. continue
  75. }
  76. tok += chr
  77. }
  78. tokList = append(tokList, tok)
  79. return tokList
  80. }
  81. func buildTree(tokList []string) *Node {
  82. root := &Node{}
  83. node := &Node{}
  84. node.Parent = root
  85. for _, tok := range tokList {
  86. prev := node
  87. prev.token = tok
  88. node = &Node{}
  89. if getTokType(tok) == "open" {
  90. node.Parent = prev
  91. }
  92. if getTokType(tok) == "text" {
  93. node.Parent = prev.Parent
  94. }
  95. if getTokType(tok) == "selfclosing" {
  96. node.Parent = prev.Parent
  97. }
  98. if getTokType(tok) == "comment" {
  99. node.Parent = prev.Parent
  100. }
  101. if getTokType(tok) == "close" {
  102. prev.Parent = prev.Parent.Parent
  103. node.Parent = prev.Parent
  104. }
  105. prev.Parent.Children = append(prev.Parent.Children, prev)
  106. }
  107. return root
  108. }
  109. func getTokType(tok string) string {
  110. if len(tok) < 2 {
  111. return "text"
  112. }
  113. fc := string(tok[0])
  114. sc := string(tok[1])
  115. if sc == "/" {
  116. return "close"
  117. }
  118. if fc == "<" && sc == "!" {
  119. return "comment"
  120. }
  121. if fc == "<" && sc == "?" {
  122. return "text"
  123. }
  124. if fc == "<" {
  125. if isSelfClosing(tok) {
  126. return "selfclosing"
  127. }
  128. return "open"
  129. }
  130. return "text"
  131. }
  132. func isSelfClosing(tok string) bool {
  133. tags := map[string]bool{
  134. "area": true,
  135. "base": true,
  136. "br": true,
  137. "col": true,
  138. "embed": true,
  139. "hr": true,
  140. "img": true,
  141. "input": true,
  142. "link": true,
  143. "meta": true,
  144. "param": true,
  145. "source": true,
  146. "track": true,
  147. "wbr": true,
  148. }
  149. return tags[getTagName(tok)]
  150. }
  151. func getTagName(tok string) string {
  152. tName := ""
  153. for i := 1; i < len(tok); i++ {
  154. chr := string(tok[i])
  155. if chr == " " || chr == ">" {
  156. break
  157. }
  158. tName += chr
  159. }
  160. return tName
  161. }
  162. func getCloseNode(node *Node) *Node {
  163. if getTokType(node.token) != "open" {
  164. return &Node{}
  165. }
  166. idx := 0
  167. for i, child := range node.Parent.Children {
  168. if child == node {
  169. idx = i
  170. break
  171. }
  172. }
  173. idx = idx + 1
  174. if idx > len(node.Parent.Children)-1 {
  175. err := errors.New("Parse Error: Unclosed tag in " + node.token)
  176. utils.Err(err)
  177. idx--
  178. }
  179. return node.Parent.Children[idx]
  180. }
  181. func matchSelector(node *Node, sel string) bool {
  182. if getTokType(node.token) == "close" {
  183. return false
  184. }
  185. if getTagName(node.token) == sel {
  186. return true
  187. }
  188. if "#"+node.GetAttribute("id") == sel {
  189. return true
  190. }
  191. for _, class := range node.ClassList() {
  192. if "."+class == sel {
  193. return true
  194. }
  195. }
  196. return false
  197. }