Skip to content

Commit cc911a3

Browse files
committed
implemente inside out croissant for review datasets #12124
1 parent 97ff858 commit cc911a3

9 files changed

Lines changed: 819 additions & 357 deletions

File tree

src/main/java/edu/harvard/iq/dataverse/export/croissant/CroissantExportUtil.java

Lines changed: 385 additions & 357 deletions
Large diffs are not rendered by default.

src/test/java/edu/harvard/iq/dataverse/api/ReviewsIT.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import static jakarta.ws.rs.core.Response.Status.CREATED;
1212
import static jakarta.ws.rs.core.Response.Status.OK;
1313
import static org.hamcrest.CoreMatchers.is;
14+
import static org.hamcrest.CoreMatchers.nullValue;
1415
import static org.junit.jupiter.api.Assertions.assertEquals;
1516

1617
import java.io.IOException;
@@ -231,6 +232,23 @@ public void testCreateReview() {
231232
Integer reviewId = UtilIT.getDatasetIdFromResponse(createReview);
232233
String reviewPid = JsonPath.from(createReview.getBody().asString()).getString("data.persistentId");
233234

235+
UtilIT.publishDataverseViaNativeApi(dataverseAlias, apiToken).then().statusCode(OK.getStatusCode());
236+
UtilIT.publishDatasetViaNativeApi(reviewPid, "major", apiToken).then().statusCode(OK.getStatusCode());
237+
238+
/**
239+
* Review datasets are "inside out" in the sense that we don't present the
240+
* normal metadata fields via Croissant. Instead, we attempt to represent the
241+
* item being reviewed. The number of fields we can expose is limited, but we
242+
* can expose "itemReviewedUrl" as "url" for example.
243+
*/
244+
Response insideOutCroissant = UtilIT.exportDataset(reviewPid, "croissantSlim");
245+
insideOutCroissant.prettyPrint();
246+
insideOutCroissant.then().assertThat()
247+
.statusCode(OK.getStatusCode())
248+
.body("@type", is("sc:Dataset"))
249+
.body("name", nullValue())
250+
.body("url", is("https://datacommons.org/tools/statvar#sv=Percent_Person_Children_WithAsthma"));
251+
234252
}
235253

236254
/**

src/test/java/edu/harvard/iq/dataverse/export/CroissantExporterSlimTest.java

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ public class CroissantExporterSlimTest {
4242
static ExportDataProvider dataProviderJunk;
4343
static OutputStream outputStreamDraft;
4444
static ExportDataProvider dataProviderDraft;
45+
static OutputStream outputStreamReview;
46+
static ExportDataProvider dataProviderReview;
4547

4648
@BeforeAll
4749
public static void setUp() {
@@ -430,6 +432,70 @@ public String getDataCiteXml() {
430432
}
431433
}
432434
};
435+
436+
outputStreamReview = new ByteArrayOutputStream();
437+
dataProviderReview =
438+
new ExportDataProvider() {
439+
@Override
440+
public JsonObject getDatasetJson() {
441+
String pathToJsonFile =
442+
"src/test/resources/croissant/review/in/datasetJson.json";
443+
try (JsonReader jsonReader =
444+
Json.createReader(new FileReader(pathToJsonFile))) {
445+
return jsonReader.readObject();
446+
} catch (FileNotFoundException ex) {
447+
return null;
448+
}
449+
}
450+
451+
@Override
452+
public JsonObject getDatasetORE() {
453+
String pathToJsonFile =
454+
"src/test/resources/croissant/review/in/datasetORE.json";
455+
try (JsonReader jsonReader =
456+
Json.createReader(new FileReader(pathToJsonFile))) {
457+
return jsonReader.readObject();
458+
} catch (FileNotFoundException ex) {
459+
return null;
460+
}
461+
}
462+
463+
@Override
464+
public JsonArray getDatasetFileDetails() {
465+
String pathToJsonFile =
466+
"src/test/resources/croissant/review/in/datasetFileDetails.json";
467+
try (JsonReader jsonReader =
468+
Json.createReader(new FileReader(pathToJsonFile))) {
469+
return jsonReader.readArray();
470+
} catch (FileNotFoundException ex) {
471+
return null;
472+
}
473+
}
474+
475+
@Override
476+
public JsonObject getDatasetSchemaDotOrg() {
477+
String pathToJsonFile =
478+
"src/test/resources/croissant/review/in/datasetSchemaDotOrg.json";
479+
try (JsonReader jsonReader =
480+
Json.createReader(new FileReader(pathToJsonFile))) {
481+
return jsonReader.readObject();
482+
} catch (FileNotFoundException ex) {
483+
return null;
484+
}
485+
}
486+
487+
@Override
488+
public String getDataCiteXml() {
489+
try {
490+
return Files.readString(
491+
Paths.get(
492+
"src/test/resources/croissant/review/in/dataCiteXml.xml"),
493+
StandardCharsets.UTF_8);
494+
} catch (IOException ex) {
495+
return null;
496+
}
497+
}
498+
};
433499
}
434500

435501
@Test
@@ -544,6 +610,20 @@ public void testExportDatasetDraft() throws Exception {
544610
assertEquals(prettyPrint(expected), prettyPrint(outputStreamDraft.toString()));
545611
}
546612

613+
@Test
614+
public void testExportDatasetReview() throws Exception {
615+
exporter.exportDataset(dataProviderReview, outputStreamReview);
616+
String actual = outputStreamReview.toString();
617+
writeCroissantFile(actual, "review");
618+
String expected =
619+
Files.readString(
620+
Paths.get(
621+
"src/test/resources/croissant/review/expected/review-croissantSlim.json"),
622+
StandardCharsets.UTF_8);
623+
JSONAssert.assertEquals(expected, actual, true);
624+
assertEquals(prettyPrint(expected), prettyPrint(outputStreamReview.toString()));
625+
}
626+
547627
private void writeCroissantFile(String actual, String name) throws IOException {
548628
Path dir =
549629
Files.createDirectories(Paths.get("src/test/resources/croissant/" + name + "/out"));
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
{
2+
"@context": {
3+
"@language": "en",
4+
"@vocab": "http://schema.org/",
5+
"citeAs": "cr:citeAs",
6+
"column": "cr:column",
7+
"conformsTo": "dct:conformsTo",
8+
"cr": "http://mlcommons.org/croissant/",
9+
"rai": "http://mlcommons.org/croissant/RAI/",
10+
"data": {
11+
"@id": "cr:data",
12+
"@type": "@json"
13+
},
14+
"dataType": {
15+
"@id": "cr:dataType",
16+
"@type": "@vocab"
17+
},
18+
"dct": "http://purl.org/dc/terms/",
19+
"ddi-stats": "http://rdf-vocabulary.ddialliance.org/cv/SummaryStatisticType/2.1.2/",
20+
"examples": {
21+
"@id": "cr:examples",
22+
"@type": "@json"
23+
},
24+
"extract": "cr:extract",
25+
"field": "cr:field",
26+
"fileProperty": "cr:fileProperty",
27+
"fileObject": "cr:fileObject",
28+
"fileSet": "cr:fileSet",
29+
"format": "cr:format",
30+
"includes": "cr:includes",
31+
"isLiveDataset": "cr:isLiveDataset",
32+
"jsonPath": "cr:jsonPath",
33+
"key": "cr:key",
34+
"md5": "cr:md5",
35+
"parentField": "cr:parentField",
36+
"path": "cr:path",
37+
"recordSet": "cr:recordSet",
38+
"references": "cr:references",
39+
"regex": "cr:regex",
40+
"repeated": "cr:repeated",
41+
"replace": "cr:replace",
42+
"samplingRate": "cr:samplingRate",
43+
"sc": "http://schema.org/",
44+
"separator": "cr:separator",
45+
"source": "cr:source",
46+
"subField": "cr:subField",
47+
"transform": "cr:transform"
48+
},
49+
"@type": "sc:Dataset",
50+
"conformsTo": "http://mlcommons.org/croissant/1.1",
51+
"url": "https://datacommons.org/tools/statvar#sv=Percent_Person_Children_WithAsthma"
52+
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<resource xmlns="http://datacite.org/schema/kernel-4" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4.5/metadata.xsd">
3+
<identifier identifierType="DOI">10.5072/FK2/ZIKBUC</identifier>
4+
<creators>
5+
<creator>
6+
<creatorName nameType="Personal">Wazowski, Mike</creatorName>
7+
<givenName>Mike</givenName>
8+
<familyName>Wazowski</familyName>
9+
</creator>
10+
</creators>
11+
<titles>
12+
<title>Review of Percent of Children That Have Asthma</title>
13+
</titles>
14+
<publisher>Root</publisher>
15+
<publicationYear>2026</publicationYear>
16+
<subjects>
17+
<subject>Medicine, Health and Life Sciences</subject>
18+
</subjects>
19+
<dates>
20+
<date dateType="Available">2026-03-13</date>
21+
</dates>
22+
<resourceType resourceTypeGeneral="Other">Review</resourceType>
23+
<version>1.0</version>
24+
<rightsList>
25+
<rights rightsURI="info:eu-repo/semantics/openAccess"/>
26+
<rights rightsURI="http://creativecommons.org/publicdomain/zero/1.0" rightsIdentifier="CC0-1.0" rightsIdentifierScheme="SPDX" schemeURI="https://spdx.org/licenses/" xml:lang="en">Creative Commons CC0 1.0 Universal Public Domain Dedication.</rights>
27+
</rightsList>
28+
<descriptions>
29+
<description descriptionType="Abstract">This is a review of a dataset.</description>
30+
</descriptions>
31+
</resource>
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
[]
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
{
2+
"id": 3,
3+
"identifier": "FK2/ZIKBUC",
4+
"persistentUrl": "https://doi.org/10.5072/FK2/ZIKBUC",
5+
"protocol": "doi",
6+
"authority": "10.5072",
7+
"separator": "/",
8+
"publisher": "Root",
9+
"publicationDate": "2026-03-13",
10+
"storageIdentifier": "local://10.5072/FK2/ZIKBUC",
11+
"datasetType": "review",
12+
"locks": [],
13+
"datasetVersion": {
14+
"id": 1,
15+
"datasetId": 3,
16+
"datasetPersistentId": "doi:10.5072/FK2/ZIKBUC",
17+
"datasetType": "review",
18+
"storageIdentifier": "local://10.5072/FK2/ZIKBUC",
19+
"versionNumber": 1,
20+
"internalVersionNumber": 4,
21+
"versionMinorNumber": 0,
22+
"versionState": "RELEASED",
23+
"latestVersionPublishingState": "RELEASED",
24+
"lastUpdateTime": "2026-03-13T20:30:58Z",
25+
"releaseTime": "2026-03-13T20:30:58Z",
26+
"createTime": "2026-03-13T20:30:56Z",
27+
"publicationDate": "2026-03-13",
28+
"citationDate": "2026-03-13",
29+
"license": {
30+
"name": "CC0 1.0",
31+
"uri": "http://creativecommons.org/publicdomain/zero/1.0",
32+
"iconUri": "https://licensebuttons.net/p/zero/1.0/88x31.png",
33+
"rightsIdentifier": "CC0-1.0",
34+
"rightsIdentifierScheme": "SPDX",
35+
"schemeUri": "https://spdx.org/licenses/",
36+
"languageCode": "en"
37+
},
38+
"fileAccessRequest": true,
39+
"metadataBlocks": {
40+
"citation": {
41+
"displayName": "Citation Metadata",
42+
"name": "citation",
43+
"fields": [
44+
{
45+
"typeName": "title",
46+
"multiple": false,
47+
"typeClass": "primitive",
48+
"value": "Review of Percent of Children That Have Asthma"
49+
},
50+
{
51+
"typeName": "author",
52+
"multiple": true,
53+
"typeClass": "compound",
54+
"value": [
55+
{
56+
"authorName": {
57+
"typeName": "authorName",
58+
"multiple": false,
59+
"typeClass": "primitive",
60+
"value": "Wazowski, Mike"
61+
}
62+
}
63+
]
64+
},
65+
{
66+
"typeName": "datasetContact",
67+
"multiple": true,
68+
"typeClass": "compound",
69+
"value": [
70+
{
71+
"datasetContactEmail": {
72+
"typeName": "datasetContactEmail",
73+
"multiple": false,
74+
"typeClass": "primitive",
75+
"value": "[email protected]"
76+
}
77+
}
78+
]
79+
},
80+
{
81+
"typeName": "dsDescription",
82+
"multiple": true,
83+
"typeClass": "compound",
84+
"value": [
85+
{
86+
"dsDescriptionValue": {
87+
"typeName": "dsDescriptionValue",
88+
"multiple": false,
89+
"typeClass": "primitive",
90+
"value": "This is a review of a dataset."
91+
}
92+
}
93+
]
94+
},
95+
{
96+
"typeName": "subject",
97+
"multiple": true,
98+
"typeClass": "controlledVocabulary",
99+
"value": [
100+
"Medicine, Health and Life Sciences"
101+
]
102+
}
103+
]
104+
},
105+
"review": {
106+
"displayName": "Review Metadata",
107+
"name": "review",
108+
"fields": [
109+
{
110+
"typeName": "itemReviewed",
111+
"multiple": false,
112+
"typeClass": "compound",
113+
"value": {
114+
"itemReviewedUrl": {
115+
"typeName": "itemReviewedUrl",
116+
"multiple": false,
117+
"typeClass": "primitive",
118+
"value": "https://datacommons.org/tools/statvar#sv=Percent_Person_Children_WithAsthma"
119+
},
120+
"itemReviewedType": {
121+
"typeName": "itemReviewedType",
122+
"multiple": false,
123+
"typeClass": "controlledVocabulary",
124+
"value": "Dataset"
125+
},
126+
"itemReviewedCitation": {
127+
"typeName": "itemReviewedCitation",
128+
"multiple": false,
129+
"typeClass": "primitive",
130+
"value": "\"Statistical Variable Explorer - Data Commons.\" Datacommons.org, 2026, datacommons.org/tools/statvar#sv=Percent_Person_Children_WithAsthma. Accessed 9 Mar. 2026."
131+
}
132+
}
133+
}
134+
]
135+
}
136+
},
137+
"files": [],
138+
"citation": "Wazowski, Mike, 2026, \"Review of Percent of Children That Have Asthma\", https://doi.org/10.5072/FK2/ZIKBUC, Root, V1"
139+
}
140+
}

0 commit comments

Comments
 (0)