Skip to content

Commit

Permalink
Merge pull request #514 from padix-key/ncbi-key
Browse files Browse the repository at this point in the history
Support NCBI API keys
  • Loading branch information
padix-key authored Dec 9, 2023
2 parents 3b194c4 + bd813b9 commit 3d0bc8c
Show file tree
Hide file tree
Showing 11 changed files with 127 additions and 41 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/test_and_deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,8 @@ jobs:
defaults:
run:
shell: bash -l {0}
env:
NCBI_API_KEY: ${{ secrets.NCBI_API_KEY }}

steps:
- uses: actions/checkout@v3
Expand Down
4 changes: 4 additions & 0 deletions doc/apidoc.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@
"search",
"fetch",
"fetch_single_file"
],
"API keys" : [
"set_api_key",
"get_api_key"
]
},

Expand Down
7 changes: 5 additions & 2 deletions doc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import tutorial
import scraper
import bibliography
import key


# Reset matplotlib params
Expand Down Expand Up @@ -134,7 +135,7 @@
"github_type" : "star",
"page_width" : "1200px",
"fixed_sidebar" : "true",

"sidebar_link_underscore" : "#FFFFFF",
"link" : "#006B99",
}
Expand All @@ -147,7 +148,7 @@
"filename_pattern" : "^((?!_noexec).)*$",
"ignore_pattern" : "(.*ignore\.py)|(.*pymol\.py)",
"backreferences_dir" : None,
"download_all_examples" : False,
"download_all_examples" : False,
# Never report run time
"min_reported_time" : sys.maxsize,
"default_thumb_file" : join(
Expand All @@ -161,6 +162,8 @@
"matplotlib_animations" : True,
"backreferences_dir" : "examples/backreferences",
"doc_module" : ("biotite",),
# Set the NCBI API key
"reset_modules" : (key.set_ncbi_api_key_from_env,),
}


Expand Down
13 changes: 9 additions & 4 deletions doc/contribute.rst
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ When using *Biotite* internal imports, always use relative imports. Otherwise

.. Type annotations
^^^^^^^^^^^^^^^^
*Biotite* obligatorily uses type annotations (:PEP:`484`) for its public API.
This enables static type checkers (e.g. *mypy*) to detect programming errors
at compile time.
Expand All @@ -163,7 +163,7 @@ folder.
*Biotite* uses *Sphinx* for building its documentation and therefore the
documentation is based on *reStructuredText* files.
The line length of these ``*.rst`` files is also limited to
79 characters, with the exceptions already mentioned above.
79 characters, with the exceptions already mentioned above.

Contributing examples
^^^^^^^^^^^^^^^^^^^^^
Expand Down Expand Up @@ -351,14 +351,19 @@ In order to omit building the tutorial and gallery, type
instead.

Building the tutorial and the gallery may raise a ``RequestError`` due to
a hight number of requests to the NCBI Entrez database.
This can be fixed by exporting the ``NCBI_API_KEY`` environment variable,
containing an
`NCBI API key <https://ncbiinsights.ncbi.nlm.nih.gov/2017/11/02/new-api-keys-for-the-e-utilities/>`_.


Required packages
-----------------

The following packages are required for the complete build process
including the creation of the entire documentation:

- *numpy*
- *scipy*
- *networkx*
Expand Down Expand Up @@ -403,7 +408,7 @@ or offer objects that build up on these ones.

There can be good reasons why one could choose to publish code as extension
package instead of contributing it directly to the *Biotite* project:

- Independent development
- An incompatible license
- The code's use cases are too specialized
Expand Down
9 changes: 9 additions & 0 deletions doc/key.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import os


def set_ncbi_api_key_from_env(*args, **kwargs):
# Import inside function as Biotite may not be known
# at the time of function definition
import biotite.database.entrez as entrez

entrez.set_api_key(os.environ.get("NCBI_API_KEY"))
13 changes: 7 additions & 6 deletions doc/tutorial.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import os.path
import os
import codeop
import logging
import copy
from importlib.util import module_from_spec, spec_from_loader
from sphinx.util.logging import getLogger
Expand All @@ -10,10 +9,12 @@
from sphinx_gallery.gen_gallery import DEFAULT_GALLERY_CONF
import sphinx_gallery.scrapers as scrapers
import sphinx_gallery.py_source_parser as parser
import biotite
from key import set_ncbi_api_key_from_env


def create_tutorial(src_dir, target_dir):
set_ncbi_api_key_from_env()

logger = getLogger('sphinx-gallery')
logger.info("generating tutorial...", color="white")
with open(os.path.join(src_dir, "scripts"), "r") as file:
Expand All @@ -24,7 +25,7 @@ def create_tutorial(src_dir, target_dir):
)
for script in iterator:
_create_tutorial_section(script, src_dir, target_dir)

# Create index
# String for enumeration of tutorial pages
include_string = "\n\n".join(
Expand Down Expand Up @@ -109,14 +110,14 @@ def _create_tutorial_section(fname, src_dir, target_dir):

else:
content_rst += block_content + "\n\n"

with open(os.path.join(target_dir, f"{base_image_name}.rst"), "w") as file:
file.write(content_rst)

# Write checksum of file to avoid unnecessary rerun
with open(md5_file, "w") as file:
file.write(genrst.get_md5sum(src_file))


def _md5sum_is_current(src_file, md5_file):
if not os.path.exists(md5_file):
Expand Down
3 changes: 2 additions & 1 deletion src/biotite/database/entrez/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,5 @@

from .dbnames import *
from .download import *
from .query import *
from .query import *
from .key import *
17 changes: 14 additions & 3 deletions src/biotite/database/entrez/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
__author__ = "Patrick Kunzmann, Maximilian Dombrowsky"
__all__ = ["check_for_errors"]

import json
from ..error import RequestError


Expand All @@ -29,17 +30,27 @@
def check_for_errors(message):
"""
Check for common error messages in NCBI Entrez database responses.
Parameters
----------
message : str
The message received from NCBI Entrez.
The message received from NCBI Entrez.
Raises
------
RequestError
If the message contains an error message.
"""
# Server can respond short JSON error messages
if len(message) < 500:
try:
message_json = json.loads(message)
if "error" in message_json:
raise RequestError(message_json["error"])
except json.decoder.JSONDecodeError:
# It is not a JSON message
pass

# Error always appear at the end of message
message_end = message[-200:]
# Seemingly arbitrary '+' characters are in NCBI error messages
Expand Down
33 changes: 20 additions & 13 deletions src/biotite/database/entrez/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import requests
from .check import check_for_errors
from .dbnames import sanitize_database_name
from .key import get_api_key
from ..error import RequestError


Expand All @@ -23,15 +24,15 @@ def fetch(uids, target_path, suffix, db_name, ret_type,
ret_mode="text", overwrite=False, verbose=False):
"""
Download files from the NCBI Entrez database in various formats.
The data for each UID will be fetched into a separate file.
A list of valid database, retrieval type and mode combinations can
be found under
`<https://www.ncbi.nlm.nih.gov/books/NBK25499/table/chapter4.T._valid_values_of__retmode_and/?report=objectonly>`_
This function requires an internet connection.
Parameters
----------
uids : str or iterable object of str
Expand All @@ -58,7 +59,7 @@ def fetch(uids, target_path, suffix, db_name, ret_type,
verbose: bool, optional
If true, the function will output the download progress.
(Default: False)
Returns
-------
files : str or StringIO or BytesIO or list of (str or StringIO or BytesIO)
Expand All @@ -68,22 +69,22 @@ def fetch(uids, target_path, suffix, db_name, ret_type,
object) was given, a list of strings is returned.
If `target_path` is ``None``, the file contents are stored in
either `StringIO` or `BytesIO` objects.
Warnings
--------
Even if you give valid input to this function, in rare cases the
database might return no or malformed data to you.
In these cases the request should be retried.
When the issue occurs repeatedly, the error is probably in your
input.
See also
--------
fetch_single_file
Examples
--------
>>> import os.path
>>> files = fetch(["1L2Y_A","3O5R_A"], path_to_directory, suffix="fa",
... db_name="protein", ret_type="fasta")
Expand Down Expand Up @@ -122,6 +123,9 @@ def fetch(uids, target_path, suffix, db_name, ret_type,
"tool" : "Biotite",
"mail" : "[email protected]"
}
api_key = get_api_key()
if api_key is not None:
param_dict["api_key"] = api_key
r = requests.get(_fetch_url, params=param_dict)
content = r.text
check_for_errors(content)
Expand All @@ -147,7 +151,7 @@ def fetch_single_file(uids, file_name, db_name, ret_type, ret_mode="text",
"""
Almost the same as :func:`fetch()`, but the data for the given UIDs
will be stored in a single file.
Parameters
----------
uids : iterable object of str
Expand All @@ -164,22 +168,22 @@ def fetch_single_file(uids, file_name, db_name, ret_type, ret_mode="text",
overwrite : bool, optional
If false, the file is only downloaded, if no file with the same
name already exists.
Returns
-------
file : str or StringIO or BytesIO
The file name of the downloaded file.
If `file_name` is ``None``, the file content is stored in
either a `StringIO` or a `BytesIO` object.
Warnings
--------
Even if you give valid input to this function, in rare cases the
database might return no or malformed data to you.
In these cases the request should be retried.
When the issue occurs repeatedly, the error is probably in your
input.
See also
--------
fetch
Expand All @@ -203,6 +207,9 @@ def fetch_single_file(uids, file_name, db_name, ret_type, ret_mode="text",
"tool" : "Biotite",
"mail" : "[email protected]"
}
api_key = get_api_key()
if api_key is not None:
param_dict["api_key"] = api_key
r = requests.get(_fetch_url, params=param_dict)
content = r.text
check_for_errors(content)
Expand Down
44 changes: 44 additions & 0 deletions src/biotite/database/entrez/key.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# This source code is part of the Biotite package and is distributed
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
# information.

__name__ = "biotite.database.entrez"
__author__ = "Patrick Kunzmann"
__all__ = ["set_api_key", "get_api_key"]


_API_KEY = None


def get_api_key():
"""
Get the
`NCBI API key <https://ncbiinsights.ncbi.nlm.nih.gov/2017/11/02/new-api-keys-for-the-e-utilities/>`_.
Returns
-------
api_key : str or None
The API key, if it was already set before, ``None`` otherwise.
"""
global _API_KEY
return _API_KEY


def set_api_key(key):
"""
Set the
`NCBI API key <https://ncbiinsights.ncbi.nlm.nih.gov/2017/11/02/new-api-keys-for-the-e-utilities/>`_.
Using an API key increases the request limit on the NCBI servers
and is automatically used by functions in
:mod:`biotite.database.entrez`.
This key is kept only in memory and hence removed in the end of the
Python session.
Parameters
----------
api_key : str
The API key.
"""
global _API_KEY
_API_KEY = key
Loading

0 comments on commit 3d0bc8c

Please sign in to comment.