apache · kotman12 · Apr 6, 2026 · Apr 6, 2026 · Apr 12, 2026 · Apr 12, 2026
diff --git a/changelog/unreleased/SOLR-18194-nested-docs-detection-false-positive.yml b/changelog/unreleased/SOLR-18194-nested-docs-detection-false-positive.yml
@@ -0,0 +1,7 @@
+title: Fix nested docs detection false positive
+type: changed
+authors:
+  - name: Luke Kot-Zaniewski
+links:
+  - name: SOLR-18194
+    url: https://issues.apache.org/jira/browse/SOLR-18194
diff --git a/solr/core/src/java/org/apache/solr/handler/admin/api/UpgradeCoreIndex.java b/solr/core/src/java/org/apache/solr/handler/admin/api/UpgradeCoreIndex.java
@@ -149,11 +149,11 @@ private UpgradeCoreIndexResponse performUpgradeImpl(
 
       RefCounted<SolrIndexSearcher> searcherRef = core.getSearcher();
       try {
-        // Check for nested documents before processing - we don't support them
-        if (indexContainsNestedDocs(searcherRef.get())) {
+        // Check for child documents before processing - we don't support them
+        if (indexContainsChildDocs(searcherRef.get())) {
           throw new SolrException(
               BAD_REQUEST,
-              "UPGRADECOREINDEX does not support indexes containing nested documents. "
+              "UPGRADECOREINDEX does not support indexes containing child documents. "
                   + " Consider reindexing your data "
                   + "from the original source.");
         }
@@ -259,26 +259,44 @@ private boolean shouldUpgradeSegment(LeafReaderContext lrc) {
     return (segmentMinVersion == null || segmentMinVersion.major < Version.LATEST.major);
   }
 
-  private boolean indexContainsNestedDocs(SolrIndexSearcher searcher) throws IOException {
+  private boolean indexContainsChildDocs(SolrIndexSearcher searcher) throws IOException {
     IndexSchema schema = searcher.getSchema();
 
-    // First check if schema supports nested docs
+    // First check if schema supports child docs
     if (!schema.isUsableForChildDocs()) {
       return false;
     }
 
-    // Check if _root_ field has fewer unique values than documents with that field.
-    // This indicates multiple docs share the same _root_ (i.e., child docs exist)
+    String uniqueKeyFieldName = schema.getUniqueKeyField().getName();
+
+    // Compare unique _root_ values against unique id values per segment.
+    // For non-child docs, every document's _root_ equals its own id, so the number of
+    // distinct _root_ values equals the number of distinct id values. For child docs,
+    // children share the parent's _root_ value, so there are fewer distinct _root_ values
+    // than distinct id values.
+    //
+    // We intentionally compare against unique id values rather than Terms.getDocCount()
+    // (the number of documents with the _root_ field) because segment-level term statistics
+    // include deleted documents. Updates (delete + re-add of the same id) can leave multiple
+    // documents with the same _root_ value within a segment, causing getDocCount() to exceed
+    // the unique _root_ count even when no child docs exist.
     IndexReader reader = searcher.getIndexReader();
     for (LeafReaderContext leaf : reader.leaves()) {
-      Terms terms = leaf.reader().terms(IndexSchema.ROOT_FIELD_NAME);
-      if (terms != null) {
-        long uniqueRootValues = terms.size();
-        int docsWithRoot = terms.getDocCount();
-
-        if (uniqueRootValues == -1 || uniqueRootValues < docsWithRoot) {
-          return true; // Codec doesn't store number of terms (so a safe fallback), or multiple docs
-          // share same _root_ (aka nested docs exist)
+      Terms rootTerms = leaf.reader().terms(IndexSchema.ROOT_FIELD_NAME);
+      if (rootTerms != null) {
+        long uniqueRootValues = rootTerms.size();
+        if (uniqueRootValues == -1) {
+          return true; // Codec doesn't report term count; assume child docs as a safe fallback
+        }
+
+        Terms idTerms = leaf.reader().terms(uniqueKeyFieldName);
+        long uniqueIdValues = (idTerms != null) ? idTerms.size() : -1;
+        if (uniqueIdValues == -1) {
+          return true; // Codec doesn't report term count; assume child docs as a safe fallback
+        }
+
+        if (uniqueRootValues < uniqueIdValues) {
+          return true; // Fewer distinct _root_ values than distinct ids means child docs exist
         }
       }
     }

diff --git a/solr/core/src/test/org/apache/solr/handler/admin/UpgradeCoreIndexActionTest.java b/solr/core/src/test/org/apache/solr/handler/admin/UpgradeCoreIndexActionTest.java
@@ -323,11 +323,11 @@ private void setMinVersionForSegments(SolrCore core, Set<String> segments, Versi
   private record SegmentLayout(String coreName, String seg1, String seg2, String seg3) {}
 
   @Test
-  public void testUpgradeCoreIndexFailsWithNestedDocuments() throws Exception {
+  public void testUpgradeCoreIndexFailsWithChildDocuments() throws Exception {
     final SolrCore core = h.getCore();
     final String coreName = core.getName();
 
-    // Create a parent document with a child document (nested doc)
+    // Create a parent document with a child document
     SolrInputDocument parentDoc = new SolrInputDocument();
     parentDoc.addField("id", "100");
     parentDoc.addField("title", "Parent Document");
@@ -338,7 +338,7 @@ public void testUpgradeCoreIndexFailsWithNestedDocuments() throws Exception {
 
     parentDoc.addChildDocument(childDoc);
 
-    // Index the nested document
+    // Index the parent+child document
     try (SolrQueryRequestBase req = new SolrQueryRequestBase(core, new ModifiableSolrParams())) {
       AddUpdateCommand cmd = new AddUpdateCommand(req);
       cmd.solrDoc = parentDoc;
@@ -349,7 +349,7 @@ public void testUpgradeCoreIndexFailsWithNestedDocuments() throws Exception {
     // Verify documents were indexed (parent + child = 2 docs)
     assertQ(req("q", "*:*"), "//result[@numFound='2']");
 
-    // Attempt to upgrade the index - should fail because of nested documents
+    // Attempt to upgrade the index - should fail because of child documents
     CoreAdminHandler admin = new CoreAdminHandler(h.getCoreContainer());
     try {
       final SolrQueryResponse resp = new SolrQueryResponse();
@@ -365,10 +365,192 @@ public void testUpgradeCoreIndexFailsWithNestedDocuments() throws Exception {
                           coreName),
                       resp));
 
-      // Verify the exception message indicates nested documents are not supported
+      // Verify the exception message indicates child documents are not supported
       assertThat(
           thrown.getMessage(),
-          containsString("does not support indexes containing nested documents"));
+          containsString("does not support indexes containing child documents"));
+    } finally {
+      admin.shutdown();
+      admin.close();
+    }
+  }
+
+  // --- Child docs detection tests ---
+  //
+  // These tests verify that the child document detection in the upgrade path
+  // correctly distinguishes between genuine child docs and non-child docs,
+  // even in the presence of updates and deletes that leave deleted documents
+  // in segments (since NoMergePolicy prevents segment merges from purging them).
+
+  @Test
+  public void testChildDocsDetection_noChildDocsJustAdd() throws Exception {
+    for (int i = 0; i < 10; i++) {
+      assertU(adoc("id", String.valueOf(i), "title", "doc" + i));
+    }
+    assertU(commit("openSearcher", "true"));
+
+    assertUpgradeDoesNotDetectChildDocs();
+  }
+
+  @Test
+  public void testChildDocsDetection_withChildDocsJustAdd() throws Exception {
+    addChildDoc("100", "101");
+    addChildDoc("200", "201");
+    assertU(commit("openSearcher", "true"));
+
+    assertUpgradeDetectsChildDocs();
+  }
+
+  @Test
+  public void testChildDocsDetection_noChildDocsWithWithinCommitUpdates() throws Exception {
+    // Add docs and then update some of them BEFORE committing, so both the old
+    // (deleted) and new versions end up in the same flushed segment.
+    // With NoMergePolicy and a 100MB RAM buffer (from SolrIndexConfig defaults),
+    // no flush or merge occurs mid-batch, guaranteeing co-location.
+    //
+    // In the resulting segment, _root_ Terms stats will show:
+    //   Terms.size()     = N  (unique _root_ values, one per unique id)
+    //   Terms.getDocCount() = N + updates  (includes deleted doc entries)
+    //
+    // A naive check (uniqueRootValues < docsWithRoot) may false-positive here
+    // because multiple docs share the same _root_ value within the segment.
+    for (int i = 0; i < 10; i++) {
+      assertU(adoc("id", String.valueOf(i), "title", "doc" + i));
+    }
+    // Re-add a few docs with the same ids (within-commit updates)
+    for (int i = 0; i < 3; i++) {
+      assertU(adoc("id", String.valueOf(i), "title", "updated_doc" + i));
+    }
+    assertU(commit("openSearcher", "true"));
+
+    // 10 live docs — the updates replaced 3 docs in-place
+    assertQ(req("q", "*:*"), "//result[@numFound='10']");
+    assertUpgradeDoesNotDetectChildDocs();
+  }
+
+  @Test
+  public void testChildDocsDetection_withChildDocsWithWithinCommitUpdates() throws Exception {
+    // Same within-commit pattern but with actual child docs present
+    addChildDoc("100", "101");
+
+    // Add and immediately re-add some non-child docs
+    for (int i = 0; i < 5; i++) {
+      assertU(adoc("id", String.valueOf(i), "title", "doc" + i));
+    }
+    for (int i = 0; i < 3; i++) {
+      assertU(adoc("id", String.valueOf(i), "title", "updated_doc" + i));
+    }
+    assertU(commit("openSearcher", "true"));
+
+    assertUpgradeDetectsChildDocs();
+  }
+
+  @Test
+  public void testChildDocsDetection_noChildDocsWithWithinCommitDeletesAndUpdates()
+      throws Exception {
+    // Add docs, delete some, and update others — all before committing.
+    // Deleted and updated docs leave behind deleted entries in the same segment,
+    // which can cause false positives in the child docs detection.
+    for (int i = 0; i < 10; i++) {
+      assertU(adoc("id", String.valueOf(i), "title", "doc" + i));
+    }
+    // Delete a few
+    assertU(delI("3"));
+    assertU(delI("4"));
+    assertU(delI("5"));
+    // Update a few others
+    for (int i = 0; i < 3; i++) {
+      assertU(adoc("id", String.valueOf(i), "title", "updated_doc" + i));
+    }
+    assertU(commit("openSearcher", "true"));
+
+    // 7 live docs: ids 0,1,2 (updated), 6,7,8,9 (untouched); 3,4,5 deleted
+    assertQ(req("q", "*:*"), "//result[@numFound='7']");
+    assertUpgradeDoesNotDetectChildDocs();
+  }
+
+  @Test
+  public void testChildDocsDetection_withChildDocsWithWithinCommitDeletesAndUpdates()
+      throws Exception {
+    addChildDoc("100", "101");
+
+    for (int i = 0; i < 5; i++) {
+      assertU(adoc("id", String.valueOf(i), "title", "doc" + i));
+    }
+    assertU(delI("3"));
+    assertU(delI("4"));
+    assertU(adoc("id", "0", "title", "updated_doc0"));
+    assertU(commit("openSearcher", "true"));
+
+    assertUpgradeDetectsChildDocs();
+  }
+
+  /** Index a parent document with a single child via the update handler. */
+  private void addChildDoc(String parentId, String childId) throws Exception {
+    SolrCore core = h.getCore();
+    SolrInputDocument parentDoc = new SolrInputDocument();
+    parentDoc.addField("id", parentId);
+    parentDoc.addField("title", "Parent " + parentId);
+
+    SolrInputDocument childDoc = new SolrInputDocument();
+    childDoc.addField("id", childId);
+    childDoc.addField("title", "Child " + childId);
+    parentDoc.addChildDocument(childDoc);
+
+    try (SolrQueryRequestBase solrReq =
+        new SolrQueryRequestBase(core, new ModifiableSolrParams())) {
+      AddUpdateCommand cmd = new AddUpdateCommand(solrReq);
+      cmd.solrDoc = parentDoc;
+      core.getUpdateHandler().addDoc(cmd);
+    }
+  }
+
+  /**
+   * Assert that the upgrade endpoint does NOT throw the child-documents error. This verifies that
+   * {@code indexContainsChildDocs} returns false.
+   */
+  private void assertUpgradeDoesNotDetectChildDocs() throws Exception {
+    final String coreName = h.getCore().getName();
+    CoreAdminHandler admin = new CoreAdminHandler(h.getCoreContainer());
+    try {
+      final SolrQueryResponse resp = new SolrQueryResponse();
+      admin.handleRequestBody(
+          req(
+              CoreAdminParams.ACTION,
+              CoreAdminParams.CoreAdminAction.UPGRADECOREINDEX.toString(),
+              CoreAdminParams.CORE,
+              coreName),
+          resp);
+      assertNull("Unexpected exception: " + resp.getException(), resp.getException());
+    } finally {
+      admin.shutdown();
+      admin.close();
+    }
+  }
+
+  /**
+   * Assert that the upgrade endpoint DOES throw the child-documents error. This verifies that
+   * {@code indexContainsChildDocs} returns true.
+   */
+  private void assertUpgradeDetectsChildDocs() throws Exception {
+    final String coreName = h.getCore().getName();
+    CoreAdminHandler admin = new CoreAdminHandler(h.getCoreContainer());
+    try {
+      final SolrQueryResponse resp = new SolrQueryResponse();
+      SolrException thrown =
+          assertThrows(
+              SolrException.class,
+              () ->
+                  admin.handleRequestBody(
+                      req(
+                          CoreAdminParams.ACTION,
+                          CoreAdminParams.CoreAdminAction.UPGRADECOREINDEX.toString(),
+                          CoreAdminParams.CORE,
+                          coreName),
+                      resp));
+      assertThat(
+          thrown.getMessage(),
+          containsString("does not support indexes containing child documents"));
     } finally {
       admin.shutdown();
       admin.close();