Skip to content

Commit 8d964c5

Browse files
committed
xpath: fix attribute paths returning String instead of NodeSet
Multi-step paths ending with an attribute axis (e.g., item/@amount) incorrectly returned a single String instead of a NodeSet, causing sum(), count(), and other functions to fail with TypeError. Replace the short-circuit with collect_attribute_nodeset() that populates an attr_string_values map so string_value() returns the attribute value instead of the element's text content.
1 parent 737cc0c commit 8d964c5

1 file changed

Lines changed: 73 additions & 41 deletions

File tree

src/xpath/eval.rs

Lines changed: 73 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
//! All 27 core `XPath` 1.0 functions are implemented (node-set, string,
2323
//! boolean, and number function groups).
2424
25+
use std::cell::RefCell;
2526
use std::collections::{HashMap, HashSet};
2627

2728
use super::ast::{Axis, BinaryOp, Expr, NodeTest, Step};
@@ -58,6 +59,13 @@ pub struct XPathContext<'a> {
5859
context_size: usize,
5960
/// Variable bindings available during evaluation.
6061
variables: HashMap<String, XPathValue>,
62+
/// Attribute string values for nodes returned by attribute axis steps.
63+
///
64+
/// When a location path ends with an attribute axis (e.g., `item/@amount`),
65+
/// the element `NodeId` is returned in the node-set, but its string value
66+
/// for `XPath` purposes should be the attribute value, not the element's
67+
/// text content. This map provides that override.
68+
attr_string_values: RefCell<HashMap<NodeId, String>>,
6169
}
6270

6371
/// An `XPath` 1.0 value.
@@ -80,6 +88,7 @@ impl<'a> XPathContext<'a> {
8088
context_position: 1,
8189
context_size: 1,
8290
variables: HashMap::new(),
91+
attr_string_values: RefCell::new(HashMap::new()),
8392
}
8493
}
8594

@@ -208,27 +217,15 @@ impl<'a> XPathContext<'a> {
208217
// -----------------------------------------------------------------------
209218

210219
fn eval_relative_path(&self, steps: &[Step]) -> Result<XPathValue, XPathError> {
211-
// Special case: a single attribute axis step (e.g., `@id`) returns the
212-
// attribute value as a string rather than a node-set, because attributes
213-
// are not tree nodes in our arena.
214-
if steps.len() == 1 && steps[0].axis == Axis::Attribute && steps[0].predicates.is_empty() {
215-
return Ok(self.eval_attribute_access(self.context_node, &steps[0].node_test));
216-
}
217-
218220
let mut nodes = vec![self.context_node];
219221
let mut i = 0;
220222
while i < steps.len() {
221223
let step = &steps[i];
222-
// If the last step is an attribute axis, resolve to attribute values
224+
// If the last step is an attribute axis, collect attribute values
225+
// into the attr_string_values map so that string_value() returns
226+
// the attribute value rather than the element's text content.
223227
if i == steps.len() - 1 && step.axis == Axis::Attribute && step.predicates.is_empty() {
224-
// For multiple context nodes, return the first attribute value found
225-
for &node in &nodes {
226-
let val = self.eval_attribute_access(node, &step.node_test);
227-
if !matches!(&val, XPathValue::String(s) if s.is_empty()) {
228-
return Ok(val);
229-
}
230-
}
231-
return Ok(XPathValue::String(String::new()));
228+
return Ok(self.collect_attribute_nodeset(&nodes, &step.node_test));
232229
}
233230
// Optimization: fuse descendant-or-self::node()/child::X into
234231
// descendant::X — avoids materializing the huge intermediate
@@ -244,6 +241,54 @@ impl<'a> XPathContext<'a> {
244241
Ok(XPathValue::NodeSet(nodes))
245242
}
246243

244+
/// Collects attribute values from a set of element nodes, returning them
245+
/// as a `NodeSet` (using element `NodeId`s) with `attr_string_values`
246+
/// overrides so that `string_value()` returns the attribute value.
247+
///
248+
/// This handles the fact that attributes are not tree nodes in our arena.
249+
/// For a single matching attribute, returns the value as a `String` for
250+
/// backwards compatibility with expressions like `@id` in string context.
251+
fn collect_attribute_nodeset(&self, nodes: &[NodeId], test: &NodeTest) -> XPathValue {
252+
let mut result_nodes = Vec::new();
253+
let mut attr_map = self.attr_string_values.borrow_mut();
254+
255+
for &node in nodes {
256+
let attrs = self.doc.attributes(node);
257+
match test {
258+
NodeTest::Name(name) => {
259+
if let Some(attr) = attrs.iter().find(|a| a.name == *name) {
260+
attr_map.insert(node, attr.value.clone());
261+
result_nodes.push(node);
262+
}
263+
}
264+
NodeTest::Wildcard | NodeTest::Node => {
265+
// For wildcard, return one entry per attribute
266+
// Since we map NodeId → String, we can only store one value
267+
// per element. Use the first attribute's value.
268+
if let Some(attr) = attrs.first() {
269+
attr_map.insert(node, attr.value.clone());
270+
result_nodes.push(node);
271+
}
272+
}
273+
_ => {}
274+
}
275+
}
276+
277+
// For a single result, return as String for backwards compatibility
278+
// with expressions like `@id` used in string context.
279+
if result_nodes.len() == 1 {
280+
if let Some(val) = attr_map.get(&result_nodes[0]) {
281+
return XPathValue::String(val.clone());
282+
}
283+
}
284+
// Empty result
285+
if result_nodes.is_empty() {
286+
return XPathValue::String(String::new());
287+
}
288+
289+
XPathValue::NodeSet(result_nodes)
290+
}
291+
247292
/// Tries to fuse `descendant-or-self::node()` + `child::X` at position `i`
248293
/// into a single `descendant::X` step. Returns `None` if the pattern doesn't
249294
/// match (e.g., the first step has predicates, or the second step isn't `child`).
@@ -270,30 +315,6 @@ impl<'a> XPathContext<'a> {
270315
}
271316
}
272317

273-
/// Evaluates an attribute access (e.g., `@id`) on a single element node.
274-
///
275-
/// Returns the attribute value as a `String`, or an empty string if the
276-
/// attribute does not exist. This handles the fact that attributes are not
277-
/// tree nodes in our arena.
278-
fn eval_attribute_access(&self, node: NodeId, test: &NodeTest) -> XPathValue {
279-
let attrs = self.doc.attributes(node);
280-
match test {
281-
NodeTest::Name(name) => {
282-
let val = attrs
283-
.iter()
284-
.find(|a| a.name == *name)
285-
.map_or_else(String::new, |a| a.value.clone());
286-
XPathValue::String(val)
287-
}
288-
NodeTest::Wildcard | NodeTest::Node => {
289-
// Return the value of the first attribute
290-
let val = attrs.first().map_or_else(String::new, |a| a.value.clone());
291-
XPathValue::String(val)
292-
}
293-
_ => XPathValue::String(String::new()),
294-
}
295-
}
296-
297318
fn eval_root_path(&self, steps: &[Step]) -> Result<XPathValue, XPathError> {
298319
let root = self.doc.root();
299320
if steps.is_empty() {
@@ -302,13 +323,18 @@ impl<'a> XPathContext<'a> {
302323
let mut nodes = vec![root];
303324
let mut i = 0;
304325
while i < steps.len() {
326+
let step = &steps[i];
327+
// Handle final attribute axis step
328+
if i == steps.len() - 1 && step.axis == Axis::Attribute && step.predicates.is_empty() {
329+
return Ok(self.collect_attribute_nodeset(&nodes, &step.node_test));
330+
}
305331
// Optimization: fuse descendant-or-self::node()/child::X into
306332
// descendant::X.
307333
if let Some(fused) = Self::try_fuse_descendant_child(steps, i) {
308334
nodes = self.apply_step(&nodes, &fused)?;
309335
i += 2;
310336
} else {
311-
nodes = self.apply_step(&nodes, &steps[i])?;
337+
nodes = self.apply_step(&nodes, step)?;
312338
i += 1;
313339
}
314340
}
@@ -882,6 +908,7 @@ impl<'a> XPathContext<'a> {
882908
} else {
883909
self.variables.clone()
884910
},
911+
attr_string_values: RefCell::new(HashMap::new()),
885912
};
886913
let val = ctx.eval_expr(predicate)?;
887914
let keep = match &val {
@@ -1495,6 +1522,11 @@ impl<'a> XPathContext<'a> {
14951522
/// - PI: the PI data
14961523
/// - Attribute: would be the attribute value (handled separately)
14971524
fn string_value(&self, node: NodeId) -> String {
1525+
// Check if this node has an attribute string value override
1526+
// (set when a location path ended with an attribute axis step).
1527+
if let Some(val) = self.attr_string_values.borrow().get(&node) {
1528+
return val.clone();
1529+
}
14981530
let kind = &self.doc.node(node).kind;
14991531
match kind {
15001532
NodeKind::Document | NodeKind::Element { .. } => self.doc.text_content(node),

0 commit comments

Comments
 (0)