htmlquery.go 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251
  1. package domquery
  2. import (
  3. "errors"
  4. "strings"
  5. "git.clearsky.net.au/cody/gex.git/utils"
  6. )
  7. /* End Node Functions */
  8. func LoadHTML(htmlStr string) *Node {
  9. tokList := GetTokenList(htmlStr)
  10. bt := BuildTree(tokList)
  11. return bt
  12. }
  13. func GetTokenList(htmlStr string) []string {
  14. var tokList []string
  15. var tok string
  16. var isPhp bool
  17. for i := 0; i < len(htmlStr); i++ {
  18. chr := string(htmlStr[i])
  19. if strings.Contains(tok, "</style") {
  20. tok = strings.Replace(tok, "</style", "", 1)
  21. tokList = append(tokList, tok)
  22. tok = "</style>"
  23. tokList = append(tokList, tok)
  24. tok = ""
  25. continue
  26. }
  27. if len(tokList) > 0 {
  28. if strings.Contains(tokList[len(tokList)-1], "<style") {
  29. tok += chr
  30. continue
  31. }
  32. }
  33. if strings.Contains(tok, "</script") {
  34. tok = strings.Replace(tok, "</script", "", 1)
  35. tokList = append(tokList, tok)
  36. tok = "</script>"
  37. tokList = append(tokList, tok)
  38. tok = ""
  39. continue
  40. }
  41. if len(tokList) > 0 {
  42. if strings.Contains(tokList[len(tokList)-1], "<script") {
  43. tok += chr
  44. continue
  45. }
  46. }
  47. if chr == "<" {
  48. if string(htmlStr[i+1]) == "?" {
  49. isPhp = true
  50. tok += chr
  51. continue
  52. }
  53. if isPhp == true {
  54. tok += chr
  55. continue
  56. }
  57. tokList = append(tokList, tok)
  58. tok = chr
  59. continue
  60. }
  61. if chr == ">" {
  62. if string(htmlStr[i-1]) == "?" {
  63. isPhp = false
  64. tok += chr
  65. continue
  66. }
  67. if isPhp == true {
  68. tok += chr
  69. continue
  70. }
  71. tok += chr
  72. tokList = append(tokList, tok)
  73. tok = ""
  74. continue
  75. }
  76. tok += chr
  77. }
  78. tokList = append(tokList, tok)
  79. return tokList
  80. }
  81. func BuildTree(tokList []string, params ...*Node) *Node {
  82. root := &Node{}
  83. root.token = ""
  84. if len(params) > 0 {
  85. root = params[0]
  86. }
  87. prev := root
  88. for _, tok := range tokList {
  89. node := &Node{}
  90. node.token = tok
  91. if getTokType(tok) == "open" {
  92. node.Parent = prev
  93. prev.Children = append(prev.Children, node)
  94. prev = node
  95. continue
  96. }
  97. if getTokType(tok) == "close" {
  98. prev = prev.Parent
  99. node.Parent = prev
  100. prev.Children = append(prev.Children, node)
  101. continue
  102. }
  103. if getTokType(tok) == "text" {
  104. node.Parent = prev
  105. prev.Children = append(prev.Children, node)
  106. continue
  107. }
  108. if getTokType(tok) == "selfclosing" {
  109. node.Parent = prev
  110. prev.Children = append(prev.Children, node)
  111. continue
  112. }
  113. if getTokType(tok) == "comment" {
  114. node.Parent = prev
  115. prev.Children = append(prev.Children, node)
  116. continue
  117. }
  118. }
  119. return root
  120. }
  121. func getTokType(tok string) string {
  122. if len(tok) < 2 {
  123. return "text"
  124. }
  125. fc := string(tok[0])
  126. sc := string(tok[1])
  127. if sc == "/" {
  128. return "close"
  129. }
  130. if fc == "<" && sc == "!" {
  131. return "comment"
  132. }
  133. if fc == "<" && sc == "?" {
  134. return "text"
  135. }
  136. if fc == "<" {
  137. if isSelfClosing(tok) {
  138. return "selfclosing"
  139. }
  140. return "open"
  141. }
  142. return "text"
  143. }
  144. func isSelfClosing(tok string) bool {
  145. tags := map[string]bool{
  146. "area": true,
  147. "base": true,
  148. "br": true,
  149. "col": true,
  150. "embed": true,
  151. "hr": true,
  152. "img": true,
  153. "input": true,
  154. "link": true,
  155. "meta": true,
  156. "param": true,
  157. "source": true,
  158. "track": true,
  159. "wbr": true,
  160. }
  161. return tags[getTagName(tok)]
  162. }
  163. func getTagName(tok string) string {
  164. tName := ""
  165. for i := 1; i < len(tok); i++ {
  166. chr := string(tok[i])
  167. if chr == " " || chr == ">" {
  168. break
  169. }
  170. tName += chr
  171. }
  172. return tName
  173. }
  174. func getCloseNode(node *Node) *Node {
  175. if getTokType(node.token) != "open" {
  176. return &Node{}
  177. }
  178. idx := 0
  179. for i, child := range node.Parent.Children {
  180. if child == node {
  181. idx = i
  182. break
  183. }
  184. }
  185. idx = idx + 1
  186. if idx > len(node.Parent.Children)-1 {
  187. err := errors.New("Parse Error: Unclosed tag in " + node.token)
  188. utils.Err(err)
  189. idx--
  190. }
  191. return node.Parent.Children[idx]
  192. }
  193. func matchSelector(node *Node, sel string) bool {
  194. if getTokType(node.token) == "close" {
  195. return false
  196. }
  197. if getTagName(node.token) == sel {
  198. return true
  199. }
  200. if "#"+node.GetAttribute("id") == sel {
  201. return true
  202. }
  203. for _, class := range node.ClassList() {
  204. if "."+class == sel {
  205. return true
  206. }
  207. }
  208. return false
  209. }