From 0beeebefaa31ff6f725be2fa1e5aa5b703e5617e Mon Sep 17 00:00:00 2001 From: "Gantner, Florian Klaus" Date: Sat, 28 Dec 2024 13:00:34 +0100 Subject: [PATCH] datacite import plugin for Project entities import service for projects extending the existing datacite import plugin implementation and basic import mapping matching the current fields in the submission forms and the existing transformators/extractors for the metadata https://github.com/DSpace/DSpace/issues/9636 --- ...taCiteImportMetadataSourceServiceImpl.java | 16 +++ .../datacite/DataCiteProjectFieldMapping.java | 38 +++++++ ...rojectImportMetadataSourceServiceImpl.java | 24 ++++ .../spring-dspace-addon-import-services.xml | 10 ++ .../config/spring/api/external-services.xml | 11 ++ ...eProjectImportMetadataSourceServiceIT.java | 107 ++++++++++++++++++ .../dspace/app/rest/dataCiteProject-test.json | 1 + dspace/config/modules/external-providers.cfg | 6 +- .../spring/api/datacite-integration.xml | 66 +++++++++++ .../config/spring/api/external-services.xml | 11 ++ 10 files changed, 289 insertions(+), 1 deletion(-) create mode 100644 dspace-api/src/main/java/org/dspace/importer/external/datacite/DataCiteProjectFieldMapping.java create mode 100644 dspace-api/src/main/java/org/dspace/importer/external/datacite/DataCiteProjectImportMetadataSourceServiceImpl.java create mode 100644 dspace-server-webapp/src/test/java/org/dspace/app/rest/DataCiteProjectImportMetadataSourceServiceIT.java create mode 100644 dspace-server-webapp/src/test/resources/org/dspace/app/rest/dataCiteProject-test.json diff --git a/dspace-api/src/main/java/org/dspace/importer/external/datacite/DataCiteImportMetadataSourceServiceImpl.java b/dspace-api/src/main/java/org/dspace/importer/external/datacite/DataCiteImportMetadataSourceServiceImpl.java index e00b2e2cea8f..a8caaba092b8 100644 --- a/dspace-api/src/main/java/org/dspace/importer/external/datacite/DataCiteImportMetadataSourceServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/importer/external/datacite/DataCiteImportMetadataSourceServiceImpl.java @@ -53,6 +53,16 @@ public class DataCiteImportMetadataSourceServiceImpl @Autowired private ConfigurationService configurationService; + private String entityFilterQuery; + + public String getEntityFilterQuery() { + return entityFilterQuery; + } + + public void setEntityFilterQuery(String entityFilterQuery) { + this.entityFilterQuery = entityFilterQuery; + } + @Override public String getImportSource() { return "datacite"; @@ -80,6 +90,9 @@ public int getRecordsCount(String query) throws MetadataSourceException { if (StringUtils.isBlank(id)) { id = query; } + if (StringUtils.isNotBlank(getEntityFilterQuery())) { + id = id + " " + getEntityFilterQuery(); + } uriParameters.put("query", id); uriParameters.put("page[size]", "1"); int timeoutMs = configurationService.getIntProperty("datacite.timeout", 180000); @@ -118,6 +131,9 @@ public Collection getRecords(String query, int start, int count) t if (StringUtils.isBlank(id)) { id = query; } + if (StringUtils.isNotBlank(getEntityFilterQuery())) { + id = id + " " + getEntityFilterQuery(); + } uriParameters.put("query", id); // start = current dspace page / datacite page number starting with 1 // dspace rounds up/down to the next configured pagination settings. diff --git a/dspace-api/src/main/java/org/dspace/importer/external/datacite/DataCiteProjectFieldMapping.java b/dspace-api/src/main/java/org/dspace/importer/external/datacite/DataCiteProjectFieldMapping.java new file mode 100644 index 000000000000..c0c0539a5ed2 --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/importer/external/datacite/DataCiteProjectFieldMapping.java @@ -0,0 +1,38 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.importer.external.datacite; + +import java.util.Map; + +import jakarta.annotation.Resource; +import org.dspace.importer.external.metadatamapping.AbstractMetadataFieldMapping; + +/** + * An implementation of {@link AbstractMetadataFieldMapping} + * Responsible for defining the mapping of the datacite metadatum fields on the DSpace metadatum fields + * + * @author Pasquale Cavallo (pasquale.cavallo at 4science dot it) + * @author Florian Gantner (florian.gantner@uni-bamberg.de) + */ +public class DataCiteProjectFieldMapping extends AbstractMetadataFieldMapping { + + /** + * Defines which metadatum is mapped on which metadatum. Note that while the key must be unique it + * only matters here for postprocessing of the value. The mapped MetadatumContributor has full control over + * what metadatafield is generated. + * + * @param metadataFieldMap The map containing the link between retrieve metadata and metadata that will be set to + * the item. + */ + @Override + @Resource(name = "dataciteProjectMetadataFieldMap") + public void setMetadataFieldMap(Map metadataFieldMap) { + super.setMetadataFieldMap(metadataFieldMap); + } + +} diff --git a/dspace-api/src/main/java/org/dspace/importer/external/datacite/DataCiteProjectImportMetadataSourceServiceImpl.java b/dspace-api/src/main/java/org/dspace/importer/external/datacite/DataCiteProjectImportMetadataSourceServiceImpl.java new file mode 100644 index 000000000000..b598f15683f0 --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/importer/external/datacite/DataCiteProjectImportMetadataSourceServiceImpl.java @@ -0,0 +1,24 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.importer.external.datacite; + +/** + * Implements a data source for querying Datacite for specific for Project resourceTypes. + * This inherits the methods of DataCiteImportMetadataSourceServiceImpl + * + * @author Florian Gantner (florian.gantner@uni-bamberg.de) + * + */ +public class DataCiteProjectImportMetadataSourceServiceImpl + extends DataCiteImportMetadataSourceServiceImpl { + + @Override + public String getImportSource() { + return "dataciteProject"; + } +} diff --git a/dspace-api/src/main/resources/spring/spring-dspace-addon-import-services.xml b/dspace-api/src/main/resources/spring/spring-dspace-addon-import-services.xml index f7943fb2320c..a4b7e2e45769 100644 --- a/dspace-api/src/main/resources/spring/spring-dspace-addon-import-services.xml +++ b/dspace-api/src/main/resources/spring/spring-dspace-addon-import-services.xml @@ -51,11 +51,21 @@ + + + + + + + + diff --git a/dspace-api/src/test/data/dspaceFolder/config/spring/api/external-services.xml b/dspace-api/src/test/data/dspaceFolder/config/spring/api/external-services.xml index 83d45b38cc76..5450ad73aa22 100644 --- a/dspace-api/src/test/data/dspaceFolder/config/spring/api/external-services.xml +++ b/dspace-api/src/test/data/dspaceFolder/config/spring/api/external-services.xml @@ -104,5 +104,16 @@ + + + + + + + + Project + + + diff --git a/dspace-server-webapp/src/test/java/org/dspace/app/rest/DataCiteProjectImportMetadataSourceServiceIT.java b/dspace-server-webapp/src/test/java/org/dspace/app/rest/DataCiteProjectImportMetadataSourceServiceIT.java new file mode 100644 index 000000000000..d7c4bdf68fda --- /dev/null +++ b/dspace-server-webapp/src/test/java/org/dspace/app/rest/DataCiteProjectImportMetadataSourceServiceIT.java @@ -0,0 +1,107 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.app.rest; + +import static org.junit.Assert.assertEquals; +import static org.mockito.Mockito.when; + +import java.io.InputStream; +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +import org.apache.commons.io.IOUtils; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.impl.client.CloseableHttpClient; +import org.dspace.importer.external.datacite.DataCiteProjectImportMetadataSourceServiceImpl; +import org.dspace.importer.external.datamodel.ImportRecord; +import org.dspace.importer.external.liveimportclient.service.LiveImportClientImpl; +import org.dspace.importer.external.metadatamapping.MetadatumDTO; +import org.junit.Test; +import org.mockito.ArgumentMatchers; +import org.mockito.Mockito; +import org.springframework.beans.factory.annotation.Autowired; + + +/** + * Integration tests for {@link DataCiteProjectImportMetadataSourceServiceImpl} + * General tests for the datacite api are coovered in the {@link DataCiteImportMetadataSourceServiceIT} + * + * @author Florian Gantner (florian.gantner@uni-bamberg.de) + */ +public class DataCiteProjectImportMetadataSourceServiceIT extends AbstractLiveImportIntegrationTest { + + @Autowired + private LiveImportClientImpl liveImportClientImpl; + + @Autowired + private DataCiteProjectImportMetadataSourceServiceImpl dataCiteProjectServiceImpl; + + @Test + public void dataCiteImportMetadataGetRecordsTest() throws Exception { + context.turnOffAuthorisationSystem(); + CloseableHttpClient originalHttpClient = liveImportClientImpl.getHttpClient(); + CloseableHttpClient httpClient = Mockito.mock(CloseableHttpClient.class); + try (InputStream dataCiteResp = getClass().getResourceAsStream("dataCiteProject-test.json")) { + String dataCiteRespXmlResp = IOUtils.toString(dataCiteResp, Charset.defaultCharset()); + + liveImportClientImpl.setHttpClient(httpClient); + CloseableHttpResponse response = mockResponse(dataCiteRespXmlResp, 200, "OK"); + when(httpClient.execute(ArgumentMatchers.any())).thenReturn(response); + + context.restoreAuthSystemState(); + ArrayList collection2match = getRecords(); + Collection recordsImported = dataCiteProjectServiceImpl.getRecords("10.60872/ror", + 0, -1); + assertEquals(1, recordsImported.size()); + matchRecords(new ArrayList<>(recordsImported), collection2match); + } finally { + liveImportClientImpl.setHttpClient(originalHttpClient); + } + } + + private ArrayList getRecords() { + ArrayList records = new ArrayList<>(); + //define first record + List metadatums = new ArrayList<>(); + MetadatumDTO title = createMetadatumDTO("dc", "title", null, + "Affiliations and Identifiers for Research Organizations (ROR)"); + MetadatumDTO doi = createMetadatumDTO("dc", "identifier", null, "10.60872/ror"); + MetadatumDTO contributor1 = createMetadatumDTO("project", "investigator", null, + "Haberman, Ted"); + MetadatumDTO description1 = createMetadatumDTO("dc", "description", null, + "The Research Organization Registry (ROR) is a community-led project launched in January 2019 to " + + "develop an open, sustainable, usable, and unique identifier for every research organization in the " + + "world. Metadata Game Changers worked with Dryad in the first large-scale adoption of RORs by a " + + "repository. We connected to papers related to Dryad datasets, found affiliations from Crossref and " + + "other sources, searched the early ROR for identifiers, and added them to the Dryad metadata. Since " + + " that time, we have been involved in re-curating repositories to add RORs and other kinds of " + + "identifiers."); + MetadatumDTO subject1 = createMetadatumDTO("dc", "subject", null, "ROR"); + MetadatumDTO subject2 = createMetadatumDTO("dc", "subject", null, + "Research Organizations"); + MetadatumDTO subject3 = createMetadatumDTO("dc", "subject", null, "Identifiers"); + MetadatumDTO subject4 = createMetadatumDTO("dc", "subject", null, "Affiliations"); + MetadatumDTO subject5 = createMetadatumDTO("dc", "subject", null, "Metadata"); + metadatums.add(title); + metadatums.add(doi); + metadatums.add(contributor1); + metadatums.add(description1); + metadatums.add(subject1); + metadatums.add(subject2); + metadatums.add(subject3); + metadatums.add(subject4); + metadatums.add(subject5); + + ImportRecord firstRecord = new ImportRecord(metadatums); + + records.add(firstRecord); + return records; + } +} diff --git a/dspace-server-webapp/src/test/resources/org/dspace/app/rest/dataCiteProject-test.json b/dspace-server-webapp/src/test/resources/org/dspace/app/rest/dataCiteProject-test.json new file mode 100644 index 000000000000..115f07e4b85c --- /dev/null +++ b/dspace-server-webapp/src/test/resources/org/dspace/app/rest/dataCiteProject-test.json @@ -0,0 +1 @@ +{"data":{"id":"10.60872/ror","type":"dois","attributes":{"doi":"10.60872/ror","prefix":"10.60872","suffix":"ror","identifiers":[],"alternateIdentifiers":[],"creators":[{"name":"Ted Habermann","nameType":"Personal","givenName":"Ted","familyName":"Habermann","affiliation":["Metadata Game Changers (United States)"],"nameIdentifiers":[{"nameIdentifier":"https://orcid.org/0000-0003-3585-6733","nameIdentifierScheme":"ORCID"}]}],"titles":[{"lang":"en","title":"Affiliations and Identifiers for Research Organizations (ROR)"},{"lang":"en","title":"Identifying Organizations","titleType":"AlternativeTitle"}],"publisher":"Metadata Game Changers (United States)","container":{},"publicationYear":2021,"subjects":[{"subject":"ROR"},{"subject":"Research Organizations"},{"subject":"Identifiers"},{"subject":"Affiliations"},{"subject":"Metadata"}],"contributors":[{"name":"Ted Habermann","nameType":"Personal","givenName":"Ted","familyName":"Habermann","affiliation":["Metadata Game Changers (United States)"],"contributorType":"ProjectLeader","nameIdentifiers":[{"nameIdentifier":"https://orcid.org/0000-0003-3585-6733","nameIdentifierScheme":"ORCID"}]},{"name":"Ted Habermann","nameType":"Personal","givenName":"Ted","familyName":"Habermann","affiliation":["Metadata Game Changers (United States)"],"contributorType":"ContactPerson","nameIdentifiers":[{"nameIdentifier":"https://orcid.org/0000-0003-3585-6733","nameIdentifierScheme":"ORCID"}]},{"name":"John Chodaki","nameType":"Personal","affiliation":["University of California Office of the President"],"contributorType":"ProjectLeader","nameIdentifiers":[{"nameIdentifier":"https://orcid.org/0000-0002-7378-2408","nameIdentifierScheme":"ORCID"}]},{"name":"Daniella Lowenberg","nameType":"Personal","affiliation":["University of California Office of the President"],"contributorType":"ProjectLeader","nameIdentifiers":[{"nameIdentifier":"https://orcid.org/0000-0003-2255-1869","nameIdentifierScheme":"ORCID"}]}],"dates":[],"language":null,"types":{"ris":"GEN","bibtex":"misc","citeproc":"article","schemaOrg":"CreativeWork","resourceType":"Project","resourceTypeGeneral":"Project"},"relatedIdentifiers":[{"relationType":"HasPart","relatedIdentifier":"https://doi.org/10.59350/w46m1-evz59","resourceTypeGeneral":"Text","relatedIdentifierType":"DOI"},{"relationType":"HasPart","relatedIdentifier":"https://doi.org/10.59350/4tbaw-m9382","resourceTypeGeneral":"Text","relatedIdentifierType":"DOI"},{"relationType":"HasPart","relatedIdentifier":"https://doi.org/10.59350/g96gh-x2361","resourceTypeGeneral":"Text","relatedIdentifierType":"DOI"},{"relationType":"HasPart","relatedIdentifier":"https://doi.org/10.59350/ec8fd-s5t94","resourceTypeGeneral":"Text","relatedIdentifierType":"DOI"},{"relationType":"HasPart","relatedIdentifier":"https://doi.org/10.59350/93v82-yr723","resourceTypeGeneral":"Text","relatedIdentifierType":"DOI"},{"relationType":"HasPart","relatedIdentifier":"https://doi.org/10.59350/129q1-ckn64","resourceTypeGeneral":"Text","relatedIdentifierType":"DOI"},{"relationType":"HasPart","relatedIdentifier":"https://doi.org/10.59350/4gxfz-4kb47","resourceTypeGeneral":"Text","relatedIdentifierType":"DOI"},{"relationType":"HasPart","relatedIdentifier":"https://doi.org/10.59350/7gb9z-x1767","resourceTypeGeneral":"Text","relatedIdentifierType":"DOI"},{"relationType":"HasPart","relatedIdentifier":"https://doi.org/10.59350/jmewf-dsf80","resourceTypeGeneral":"Text","relatedIdentifierType":"DOI"},{"relationType":"HasPart","relatedIdentifier":"https://doi.org/10.59350/n51d6-ks443","resourceTypeGeneral":"Text","relatedIdentifierType":"DOI"},{"relationType":"HasPart","relatedIdentifier":"https://doi.org/10.59350/t6dna-wbp85","resourceTypeGeneral":"Text","relatedIdentifierType":"DOI"}],"relatedItems":[],"sizes":[],"formats":[],"version":null,"rightsList":[{"rights":"Creative Commons Attribution 4.0 International","rightsUri":"https://creativecommons.org/licenses/by/4.0/legalcode","schemeUri":"https://spdx.org/licenses/","rightsIdentifier":"cc-by-4.0","rightsIdentifierScheme":"SPDX"}],"descriptions":[{"lang":"en","description":"The Research Organization Registry (ROR) is a community-led project launched in January 2019 to develop an open, sustainable, usable, and unique identifier for every research organization in the world. Metadata Game Changers worked with Dryad in the first large-scale adoption of RORs by a repository. We connected to papers related to Dryad datasets, found affiliations from Crossref and other sources, searched the early ROR for identifiers, and added them to the Dryad metadata. Since that time, we have been involved in re-curating repositories to add RORs and other kinds of identifiers.","descriptionType":"Abstract"}],"geoLocations":[],"fundingReferences":[{"awardTitle":"Affiliations and Identifiers for Research Organizations (ROR)","funderName":"Metadata Game Changers (United States)","funderIdentifier":"https://ror.org/05bp8ka05","funderIdentifierType":"ROR"},{"awardTitle":"Affiliations and Research Organization Identifiers (RORs) For Dryad Data Reository","funderName":"University of California Office of the President","funderIdentifier":"https://ror.org/00dmfq477","funderIdentifierType":"ROR"}],"xml":"","url":"https://commons.datacite.org/doi.org/10.60872/ror","contentUrl":null,"metadataVersion":4,"schemaVersion":"https://datacite.org/schema/kernel-4","source":"api","isActive":true,"state":"findable","reason":null,"viewCount":0,"viewsOverTime":[],"downloadCount":0,"downloadsOverTime":[],"referenceCount":0,"citationCount":11,"citationsOverTime":[{"year":"2024","total":11}],"partCount":11,"partOfCount":0,"versionCount":0,"versionOfCount":0,"created":"2024-06-19T22:46:29.000Z","registered":"2024-06-20T22:04:19.000Z","published":"2021","updated":"2024-12-19T20:10:23.000Z"},"relationships":{"client":{"data":{"id":"sjyq.oozvia","type":"clients"}},"provider":{"data":{"id":"sjyq","type":"providers"}},"media":{"data":{"id":"10.60872/ror","type":"media"}},"references":{"data":[]},"citations":{"data":[{"id":"10.59350/w46m1-evz59","type":"dois"},{"id":"10.59350/4tbaw-m9382","type":"dois"},{"id":"10.59350/g96gh-x2361","type":"dois"},{"id":"10.59350/ec8fd-s5t94","type":"dois"},{"id":"10.59350/93v82-yr723","type":"dois"},{"id":"10.59350/129q1-ckn64","type":"dois"},{"id":"10.59350/4gxfz-4kb47","type":"dois"},{"id":"10.59350/7gb9z-x1767","type":"dois"},{"id":"10.59350/jmewf-dsf80","type":"dois"},{"id":"10.59350/n51d6-ks443","type":"dois"},{"id":"10.59350/t6dna-wbp85","type":"dois"}]},"parts":{"data":[{"id":"10.59350/w46m1-evz59","type":"dois"},{"id":"10.59350/4tbaw-m9382","type":"dois"},{"id":"10.59350/g96gh-x2361","type":"dois"},{"id":"10.59350/ec8fd-s5t94","type":"dois"},{"id":"10.59350/93v82-yr723","type":"dois"},{"id":"10.59350/129q1-ckn64","type":"dois"},{"id":"10.59350/4gxfz-4kb47","type":"dois"},{"id":"10.59350/7gb9z-x1767","type":"dois"},{"id":"10.59350/jmewf-dsf80","type":"dois"},{"id":"10.59350/n51d6-ks443","type":"dois"},{"id":"10.59350/t6dna-wbp85","type":"dois"}]},"partOf":{"data":[]},"versions":{"data":[]},"versionOf":{"data":[]}}}} diff --git a/dspace/config/modules/external-providers.cfg b/dspace/config/modules/external-providers.cfg index f1ec32c91034..4ee14152431b 100644 --- a/dspace/config/modules/external-providers.cfg +++ b/dspace/config/modules/external-providers.cfg @@ -91,9 +91,13 @@ wos.url.search = https://wos-api.clarivate.com/api/wos/?databaseId=WOS&lang=en&u datacite.url = https://api.datacite.org/dois/ datacite.timeout = 180000 +# additional query added to the query to limit the resultSet for specific resourceTypes. +# during import some additional whitespace is being added before the entityfilterquery value to avoid invalid queries. +datacite.publication.entityfilterquery = AND NOT ((types.resourceTypeGeneral:Project) OR (types.resourceTypeGeneral:Other AND types.resourceType:Project)) +datacite.project.entityfilterquery = AND ((types.resourceTypeGeneral:Project) OR (types.resourceTypeGeneral:Other AND types.resourceType:Project)) ################################################################# #--------------------------- ROR -------------------------------# #---------------------------------------------------------------# ror.orgunit-import.api-url = https://api.ror.org/organizations -################################################################# \ No newline at end of file +################################################################# diff --git a/dspace/config/spring/api/datacite-integration.xml b/dspace/config/spring/api/datacite-integration.xml index 236ec0a3bda9..62a25ca821d1 100644 --- a/dspace/config/spring/api/datacite-integration.xml +++ b/dspace/config/spring/api/datacite-integration.xml @@ -59,4 +59,70 @@ + + + Defines which metadatum is mapped on which metadatum. Note that while the key must be unique it + only matters here for postprocessing of the value. The mapped MetadatumContributor has full control over + what metadatafield is generated. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/dspace/config/spring/api/external-services.xml b/dspace/config/spring/api/external-services.xml index e3a842efa094..295ad9af0b70 100644 --- a/dspace/config/spring/api/external-services.xml +++ b/dspace/config/spring/api/external-services.xml @@ -271,4 +271,15 @@ + + + + + + + + Project + + +