Skip to content

Commit 2a57053

Browse files
Merge pull request #10 from udan-jayanith/newQueryings
V0.0.2
2 parents 298aa1f + 65665d6 commit 2a57053

7 files changed

Lines changed: 182 additions & 23 deletions

File tree

FUTURE-CHANGELOG.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,5 @@
88
* GetElementsByClassName
99
* GetElementsByTagName
1010
* GetElementsById
11-
* QueryList
1211
* QuerySelector
1312
* QuerySelectorAll

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
module github.com/udan-jayanith/GoHTML
22

3-
go 1.24.1
3+
go 1.25.0
44

55
require github.com/emirpasic/gods v1.18.1
66

node-tree.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,13 +68,14 @@ func (node *Node) SetTagName(tagName string) {
6868

6969
// GetAttribute returns the specified attribute value form the node. If the specified attribute doesn't exists GetAttribute returns a empty string and false.
7070
func (node *Node) GetAttribute(attributeName string) (string, bool) {
71-
v, ok := node.attributes[attributeName]
71+
v, ok := node.attributes[strings.TrimSpace(strings.ToLower(attributeName))]
7272
return v, ok
7373
}
7474

7575
// RemoveAttribute remove or delete the specified attribute.
7676
func (node *Node) RemoveAttribute(attributeName string) {
77-
delete(node.attributes, attributeName)
77+
delete(node.attributes, strings.TrimSpace(strings.ToLower(attributeName)))
78+
7879
}
7980

8081
// IterateAttributes calls callback at every attribute in the node by passing attribute and value of the node.
@@ -87,7 +88,7 @@ func (node *Node) IterateAttributes(callback func(attribute, value string)) {
8788

8889
// SetAttribute add a attribute to the node.
8990
func (node *Node) SetAttribute(attribute, value string) {
90-
node.attributes[strings.TrimSpace(attribute)] = strings.TrimSpace(value)
91+
node.attributes[strings.ToLower(strings.TrimSpace(attribute))] = strings.TrimSpace(value)
9192
}
9293

9394
// GetText returns text on the node. This does not returns text on it's child nodes. If you also wants child nodes text use GetInnerText method on the node.

querying.go

Lines changed: 112 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -39,12 +39,12 @@ func (node *Node) GetElementByClassName(className string) *Node {
3939
}
4040

4141
// GetElementByID returns the first node that match with the given idName by advancing from the node.
42-
func (node *Node) GetElementByID(idName string) *Node{
42+
func (node *Node) GetElementByID(idName string) *Node {
4343
traverser := NewTraverser(node)
4444
var returnNode *Node
4545
traverser.Walkthrough(func(node *Node) TraverseCondition {
46-
id, _ := node.GetAttribute("id")
47-
if id == idName{
46+
id, _ := node.GetAttribute("id")
47+
if id == idName {
4848
returnNode = node
4949
return StopWalkthrough
5050
}
@@ -54,15 +54,15 @@ func (node *Node) GetElementByID(idName string) *Node{
5454
}
5555

5656
// GetElementsByClassName returns a NodeList containing nodes that have the given className from the node.
57-
func (node *Node) GetElementsByClassName(className string) NodeList{
57+
func (node *Node) GetElementsByClassName(className string) NodeList {
5858
traverser := NewTraverser(node)
5959
nodeList := NewNodeList()
6060

6161
traverser.Walkthrough(func(node *Node) TraverseCondition {
6262
classList := NewClassList()
6363
classList.DecodeFrom(node)
6464

65-
if classList.Contains(className){
65+
if classList.Contains(className) {
6666
nodeList.Append(node)
6767
}
6868
return ContinueWalkthrough
@@ -71,12 +71,12 @@ func (node *Node) GetElementsByClassName(className string) NodeList{
7171
}
7272

7373
// GetElementsByTagName returns a NodeList containing nodes that have the given tagName from the node.
74-
func (node *Node) GetElementsByTagName(tagName string) NodeList{
74+
func (node *Node) GetElementsByTagName(tagName string) NodeList {
7575
traverser := NewTraverser(node)
7676
nodeList := NewNodeList()
7777

7878
traverser.Walkthrough(func(node *Node) TraverseCondition {
79-
if node.GetTagName() == tagName{
79+
if node.GetTagName() == tagName {
8080
nodeList.Append(node)
8181
}
8282
return ContinueWalkthrough
@@ -85,16 +85,119 @@ func (node *Node) GetElementsByTagName(tagName string) NodeList{
8585
}
8686

8787
// GetElementsByClassName returns a NodeList containing nodes that have the given idName from the node.
88-
func (node *Node) GetElementsById(idName string) NodeList{
88+
func (node *Node) GetElementsById(idName string) NodeList {
8989
traverser := NewTraverser(node)
9090
nodeList := NewNodeList()
9191

9292
traverser.Walkthrough(func(node *Node) TraverseCondition {
9393
id, _ := node.GetAttribute("id")
94-
if id == idName{
94+
if id == idName {
9595
nodeList.Append(node)
9696
}
9797
return ContinueWalkthrough
9898
})
9999
return nodeList
100+
}
101+
102+
// Selector types
103+
const (
104+
Id int = iota
105+
Tag
106+
Class
107+
)
108+
109+
// QueryToken store data about basic css selectors(ids, classes, tags).
110+
type QueryToken struct {
111+
Type int
112+
SelectorName string
113+
Selector string
114+
}
115+
116+
// TokenizeQuery tokenizes the query and returns a list of QueryToken.
117+
func TokenizeQuery(query string) []QueryToken {
118+
slice := make([]QueryToken, 0, 1)
119+
if strings.TrimSpace(query) == "" {
120+
return slice
121+
}
122+
123+
iter := strings.SplitSeq(query, " ")
124+
for sec := range iter {
125+
token := QueryToken{}
126+
switch sec {
127+
case "", " ", ".", "#":
128+
continue
129+
}
130+
131+
switch string(sec[0]) {
132+
case ".":
133+
token.Type = Class
134+
token.SelectorName = sec[1:]
135+
case "#":
136+
token.Type = Id
137+
token.SelectorName = sec[1:]
138+
default:
139+
token.Type = Tag
140+
token.SelectorName = sec
141+
}
142+
token.Selector = sec
143+
slice = append(slice, token)
144+
}
145+
146+
return slice
147+
}
148+
149+
func matchQueryTokens(node *Node, queryTokens []QueryToken) bool {
150+
if len(queryTokens) == 0 {
151+
return false
152+
}
153+
classList := NewClassList()
154+
classList.DecodeFrom(node)
155+
for _, token := range queryTokens {
156+
switch token.Type {
157+
case Id:
158+
idName, _ := node.GetAttribute("id")
159+
if token.SelectorName != idName {
160+
return false
161+
}
162+
case Tag:
163+
if node.GetTagName() != token.SelectorName {
164+
return false
165+
}
166+
case Class:
167+
if !classList.Contains(token.SelectorName) {
168+
return false
169+
}
170+
}
171+
}
172+
return true
173+
}
174+
175+
// QuerySelector returns the first node that matches with the give node.
176+
func (node *Node) QuerySelector(query string) *Node {
177+
queryTokens := TokenizeQuery(query)
178+
179+
traverser := NewTraverser(node)
180+
var res *Node
181+
traverser.Walkthrough(func(node *Node) TraverseCondition {
182+
if matchQueryTokens(node, queryTokens) {
183+
res = node
184+
return StopWalkthrough
185+
}
186+
return ContinueWalkthrough
187+
})
188+
return res
189+
}
190+
191+
// QuerySelectorAll returns a NodeList containing nodes that matched with the given query.
192+
func (node *Node) QuerySelectorAll(query string) NodeList{
193+
nodeList := NewNodeList()
194+
queryTokens := TokenizeQuery(query)
195+
traverser := NewTraverser(node)
196+
197+
for node := range traverser.Walkthrough{
198+
if matchQueryTokens(node, queryTokens) {
199+
nodeList.Append(node)
200+
}
201+
}
202+
return nodeList
100203
}

querying_test.go

Lines changed: 57 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ func TestGetElementByClassName(t *testing.T) {
5252
node = node.GetElementByClassName("ordered-item")
5353
if node == nil {
5454
t.Fatal("Node is nil")
55-
}else if node.GetInnerText() != "Apple" {
55+
} else if node.GetInnerText() != "Apple" {
5656
t.Fatal("Expected Apple but got ", node.GetInnerText())
5757
}
5858
}
@@ -66,7 +66,7 @@ func TestGetElementByTagName(t *testing.T) {
6666
node = node.GetElementByTagName("h2")
6767
if node == nil {
6868
t.Fatal("Node is nil")
69-
}else if node.GetInnerText() != "List 1"{
69+
} else if node.GetInnerText() != "List 1" {
7070
t.Fatal("Expected List 1 but got ", node.GetInnerText())
7171
}
7272
}
@@ -137,3 +137,58 @@ func TestGetElementsById(t *testing.T) {
137137
}
138138
}
139139
}
140+
141+
func TestSelectorTokenizer(t *testing.T) {
142+
stack := linkedliststack.New()
143+
stack.Push("article .content")
144+
stack.Push("article p h1")
145+
stack.Push("article p")
146+
stack.Push(".title #user")
147+
stack.Push("#user title .title-1")
148+
149+
for stack.Size() > 0 {
150+
val, _ := stack.Pop()
151+
selector := val.(string)
152+
153+
tokens := GoHtml.TokenizeQuery(selector)
154+
s := ""
155+
for _, token := range tokens {
156+
if s == "" {
157+
s += token.Selector
158+
} else {
159+
s += " " + token.Selector
160+
}
161+
}
162+
163+
if s != selector {
164+
t.Fatal("Expected ", selector, "but got", s)
165+
}
166+
}
167+
}
168+
169+
func TestQuerySelector(t *testing.T) {
170+
node, err := testFile4NodeTree()
171+
if err != nil {
172+
t.Fatal(err)
173+
return
174+
}
175+
imgEl := node.QuerySelector("img #idElement")
176+
imgSrc, _ := imgEl.GetAttribute("src")
177+
imgAlt, _ := imgEl.GetAttribute("alt")
178+
if imgSrc != "" || imgAlt != "" {
179+
t.Fatal("")
180+
}
181+
}
182+
183+
func TestQuerySelectorAll(t *testing.T) {
184+
node, err := testFile4NodeTree()
185+
if err != nil {
186+
t.Fatal(err)
187+
return
188+
}
189+
190+
nodeList := node.QuerySelectorAll("h2")
191+
if nodeList.Len() != 2{
192+
t.Fatal("")
193+
}
194+
}

traverser.go

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -42,15 +42,16 @@ func (t *Traverser) Previous() *Node {
4242
return t.GetCurrentNode()
4343
}
4444

45-
type TraverseCondition bool
45+
type TraverseCondition = bool
4646

4747
const (
48-
StopWalkthrough TraverseCondition = true
49-
ContinueWalkthrough TraverseCondition = false
48+
StopWalkthrough TraverseCondition = false
49+
ContinueWalkthrough TraverseCondition = true
5050
)
5151

52-
// Walkthrough traverse the node tree from the current node to the end of the node tree by visiting every node. If callback returned StopWalkthrough walkthrough function will stop else if it returned ContinueWalkthrough it advanced to the next node.
53-
// Walkthrough calls callback at every node and pass that node. Walkthrough traverse the node tree similar to DFS without visiting visited nodes iteratively.
52+
// Walkthrough traverse the node tree from the current node to the end of the node tree by visiting every node.
53+
// Walkthrough traverse the node tree similar to DFS without visiting visited nodes iteratively.
54+
// Walkthrough can be used as a range over iterator or a function that takes a callback and pass every node one by one.
5455
func (t *Traverser) Walkthrough(callback func(node *Node) TraverseCondition) {
5556
stack := linkedliststack.New()
5657
if t.GetCurrentNode() == nil {
@@ -60,7 +61,7 @@ func (t *Traverser) Walkthrough(callback func(node *Node) TraverseCondition) {
6061

6162
for stack.Size() > 0 {
6263
currentNode, _ := stack.Pop()
63-
if callback(currentNode.(*Node)) == StopWalkthrough {
64+
if !callback(currentNode.(*Node)) {
6465
return
6566
}
6667

traverser_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ func TestWalkthrough(t *testing.T) {
1919
traverser := GoHtml.NewTraverser(body)
2020

2121
resList := make([]*GoHtml.Node, 0)
22-
traverser.Walkthrough(func(node *GoHtml.Node) GoHtml.TraverseCondition {
22+
traverser.Walkthrough(func(node *GoHtml.Node) bool {
2323
resList = append(resList, node)
2424
return GoHtml.ContinueWalkthrough
2525
})

0 commit comments

Comments
 (0)