Skip to content

Commit

Permalink
serializers: bibtex: year and month using publication date
Browse files Browse the repository at this point in the history
  • Loading branch information
ptamarit authored and tmorrell committed Jan 8, 2025
1 parent de2a0e4 commit 7b3f5e6
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 13 deletions.
44 changes: 33 additions & 11 deletions invenio_rdm_records/resources/serializers/bibtex/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,15 @@

"""BibTex based Schema for Invenio RDM Records."""

import datetime
import calendar
import textwrap

from babel_edtf import parse_edtf
from edtf.parser.grammar import ParseException
from edtf.parser.parser_classes import Date, Interval
from flask_resources.serializers import BaseSerializerSchema
from marshmallow import fields, post_dump
from pydash import py_
from slugify import slugify

from ..schemas import CommonFieldsMixin
Expand All @@ -24,7 +28,7 @@ class BibTexSchema(BaseSerializerSchema, CommonFieldsMixin):
id = fields.Str()
resource_id = fields.Str(attribute="metadata.resource_type.id")
version = fields.Str(attribute="metadata.version")
date_created = fields.Method("get_date_created")
date_published = fields.Method("get_date_published")
locations = fields.Method("get_locations")
titles = fields.Method("get_titles")
doi = fields.Method("get_doi")
Expand Down Expand Up @@ -71,13 +75,31 @@ def default_entry_type(self):
"""
return BibTexFormatter.misc

def get_date_created(self, obj):
"""Get date last updated."""
date_obj = datetime.datetime.fromisoformat(obj["created"])
def get_date_published(self, obj):
"""Get publication year and month from edtf date."""
publication_date = py_.get(obj, "metadata.publication_date")
if not publication_date:
return None

month = date_obj.strftime("%b").lower()
year = date_obj.strftime("%Y")
return {"month": month, "year": year}
try:
parsed_date = parse_edtf(publication_date)
except ParseException:
return None

if isinstance(parsed_date, Interval):
# if date is an interval, use the start date
parsed_date = parsed_date.lower
elif not isinstance(parsed_date, Date):
return None

date_published = {"year": parsed_date.year}
if parsed_date.month:
month_three_letter_abbr = calendar.month_abbr[
int(parsed_date.month)
].lower()
date_published["month"] = month_three_letter_abbr

return date_published

def get_creator(self, obj):
"""Get creator."""
Expand Down Expand Up @@ -232,9 +254,9 @@ def _fetch_fields_map(self, data):
"title": (lambda titles: None if titles is None else titles[0])(
data.get("titles", None)
),
"year": data.get("date_created", {}).get("year", None),
"year": data.get("date_published", {}).get("year", None),
"doi": data.get("doi", None),
"month": data.get("date_created", {}).get("month", None),
"month": data.get("date_published", {}).get("month", None),
"version": data.get("version", None),
"url": data.get("url", None),
"school": data.get("school", None),
Expand Down Expand Up @@ -287,7 +309,7 @@ def _get_citation_key(self, data, original_data):

creator = creators[0].get("person_or_org", {})
name = creator.get("family_name", creator["name"])
pubdate = data.get("date_created", {}).get("year", None)
pubdate = data.get("date_published", {}).get("year", None)
year = id
if pubdate is not None:
year = "{}_{}".format(pubdate, id)
Expand Down
6 changes: 4 additions & 2 deletions tests/resources/serializers/test_bibtex_serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
def updated_minimal_record(minimal_record):
"""Update fields (done after record create) for BibTex serializer."""
minimal_record["access"]["status"] = "open"
minimal_record["created"] = "2023-03-09T00:00:00.000000+00:00"
minimal_record["metadata"]["publication_date"] = "2023-03-13"
minimal_record["created"] = "2024-12-17T00:00:00.000000+00:00"
minimal_record["id"] = "abcde-fghij"

for creator in minimal_record["metadata"]["creators"]:
Expand All @@ -31,7 +32,8 @@ def updated_minimal_record(minimal_record):
def updated_full_record(full_record_to_dict):
"""Update fields (done after record create) for BibTex serializer."""
full_record_to_dict["access"]["status"] = "embargoed"
full_record_to_dict["created"] = "2023-03-23T00:00:00.000000+00:00"
full_record_to_dict["metadata"]["publication_date"] = "2023-03/2024-02"
full_record_to_dict["created"] = "2024-12-17T00:00:00.000000+00:00"
full_record_to_dict["id"] = "abcde-fghij"
full_record_to_dict["metadata"]["resource_type"]["id"] = "other"

Expand Down

0 comments on commit 7b3f5e6

Please sign in to comment.