Source code for chemistry_tools.pubchem.pug_rest

#!/usr/bin/env python3
#
#  pug_rest.py
"""
Functions for interacting with PubChem PUG_REST API.
"""
#  Copyright (c) 2019-2021 Dominic Davis-Foster <dominic@davis-foster.co.uk>
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU Lesser General Public License as
#  published by the Free Software Foundation; either version 3 of the
#  License, or (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
#  GNU Lesser General Public License for more details.
#
#  You should have received a copy of the GNU Lesser General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
#  MA 02110-1301, USA.
#
#  Based on PubChemPy https://github.com/mcs07/PubChemPy/blob/master/LICENSE
#  |  Copyright 2014 Matt Swain <m.swain@me.com>
#  |  Licensed under the MIT License
#  |
#  |  Permission is hereby granted, free of charge, to any person obtaining a copy
#  |  of this software and associated documentation files (the "Software"), to deal
#  |  in the Software without restriction, including without limitation the rights
#  |  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
#  |  copies of the Software, and to permit persons to whom the Software is
#  |  furnished to do so, subject to the following conditions:
#
#  |  The above copyright notice and this permission notice shall be included in
#  |  all copies or substantial portions of the Software.
#
#  |  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
#  |  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
#  |  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
#  |  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
#  |  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
#  |  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
#  |  THE SOFTWARE.
#

# stdlib
import time
from typing import Dict, Iterable, Iterator, List, Optional, Sequence, Union
from urllib.parse import quote

# 3rd party
import requests

# this package
from chemistry_tools.pubchem import API_BASE
from chemistry_tools.pubchem.enums import PubChemFormats, PubChemNamespace
from chemistry_tools.pubchem.errors import HTTP_ERROR_CODES, PubChemHTTPError
from chemistry_tools.pubchem.utils import _force_sequence_or_csv, _make_base_url

__all__ = ["get_full_json", "async_get", "request", "do_rest_get"]


[docs]def do_rest_get( namespace: Union[PubChemNamespace, str], identifier: Union[str, int, Sequence[Union[str, int]]], format_: Union[PubChemFormats, str] = PubChemFormats.JSON, domain: Optional[str] = None, record_type: str = "2d", png_width: int = 300, png_height: int = 300, ) -> requests.Response: r""" Responsible for performing the actual GET request. :param namespace: The type of identifier to look up. Valid values are in :class:`~.PubChemNamespace`. :param identifier: Identifiers (e.g. name, CID) for the compounds to look up. When using the CID namespace data for multiple compounds can be retrieved at once by supplying either a comma-separated string or a list. :param format\_: The file format to retrieve the data in. Valid values are in :class:`~.PubChemFormats`, plus ``'PNG'``. :param domain: :param record_type: :param png_width: :param png_height: """ # domain = description, synonyms, or property followed by a comma-separated list of desired properties if not PubChemNamespace.is_valid_value(namespace): raise ValueError(f"'{namespace}' is not a valid value for 'namespace'") if not PubChemFormats.is_valid_value(format_): raise ValueError(f"'{format_}' is not a valid value for 'format_'") parsed_identifier: List[str] if namespace == PubChemNamespace.cid: parsed_identifier = _force_sequence_or_csv(identifier, "identifier") else: parsed_identifier = [str(identifier)] query_params = {} if str(format_).upper() == str(PubChemFormats.PNG): query_params["image_size"] = f"{png_width}x{png_height}" try: r = do_cached_request(namespace, parsed_identifier, format_, domain, record_type, query_params) except requests.exceptions.ConnectionError: r = do_cached_request(namespace, parsed_identifier, format_, domain, record_type, query_params) if r.status_code in HTTP_ERROR_CODES: raise PubChemHTTPError(r) return r
def do_cached_request( namespace: Union[PubChemNamespace, str], identifier: Union[Iterable[str], str], format_: Union[PubChemFormats, str], domain: Optional[str], record_type: str, query_params: Dict, ) -> requests.Response: r""" Responsible for performing cached requests. :param namespace: The type of identifier to look up. Valid values are in :class:`~.PubChemNamespace`. :param identifier: Identifiers (e.g. name, CID) for the compounds to look up. When using the CID namespace data for multiple compounds can be retrieved at once by supplying either a comma-separated string or a list. :param format\_: The file format to retrieve the data in. Valid values are in :class:`~.PubChemFormats`, plus "PNG" :param domain: :param record_type: :param query_params: """ if domain: r = (_make_base_url(namespace, identifier) / f"{domain}/{format_}").get(params=query_params) else: query_params["record_type"] = record_type r = (_make_base_url(namespace, identifier) / str(format_)).get(params=query_params) return r
[docs]def get_full_json(cid: Union[str, int]) -> str: """ Returns the full JSON record for the compound with the given ID. :param cid: """ json_file = (API_BASE / f"_view/data/compound/{cid}/JSON").get() return json_file.json()
[docs]def async_get( identifier, namespace: Union[PubChemNamespace, str] = "cid", operation=None, output="JSON", searchtype=None, **kwargs ) -> bytes: r""" Request wrapper that automatically handles asynchronous requests. :param identifier: Identifiers (e.g. name, CID) for the compounds to look up. When using the CID namespace data for multiple compounds can be retrieved at once by supplying either a comma-separated string or a list. :param namespace: The type of identifier to look up. Valid values are in :class:`~.PubChemNamespace`. :param operation: :param output: :param searchtype: :param \*\*kwargs: Keyword parameters passed along with the GET request. """ if (searchtype and searchtype != "xref") or namespace in ["formula"]: r = request(identifier, namespace, None, "JSON", searchtype, **kwargs) response = r.content status = r.json() if "Waiting" in status and "ListKey" in status["Waiting"]: identifier = status["Waiting"]["ListKey"] namespace = "listkey" while "Waiting" in status and "ListKey" in status["Waiting"]: time.sleep(2) r = request(identifier, namespace, operation, "JSON", **kwargs) response = r.content status = r.json() if output != "JSON": response = request(identifier, namespace, operation, output, searchtype, **kwargs).content else: response = request(identifier, namespace, operation, output, searchtype, **kwargs).content return response
[docs]def request( identifier, namespace: Union[PubChemNamespace, str] = "cid", operation=None, output: Union[PubChemFormats, str] = "JSON", searchtype=None, **kwargs, ) -> requests.Response: r""" Construct API request from parameters and return the response. Full specification at http://pubchem.ncbi.nlm.nih.gov/pug_rest/PUG_REST.html :param identifier: Identifiers (e.g. name, CID) for the compounds to look up. When using the CID namespace data for multiple compounds can be retrieved at once by supplying either a comma-separated string or a list. :param namespace: The type of identifier to look up. Valid values are in :class:`~.PubChemNamespace`. :param operation: :param output: :param searchtype: :param \*\*kwargs: Keyword parameters passed along with the GET request. """ # If identifier is a list, join with commas into string if isinstance(identifier, int): identifier = str(identifier) identifier = _force_sequence_or_csv(identifier, "identifier") identifier = ','.join(str(x) for x in identifier) # Build API URL urlid, params = None, {} # use this function when: # namespace in ['listkey', 'formula'] # searchtype == 'xref' urlid = quote(identifier.encode("utf8")) comps: Iterator[str] = filter(None, ("compound", searchtype, namespace, urlid, operation, output)) apiurl = API_BASE / '/'.join(comps) # Filter None values from kwargs for key, val in kwargs.items(): if val is not None: params[key] = val # print(f'Request URL: {apiurl}') # print(f'Request data: {params}') response = apiurl.get(params=params) if response.status_code in HTTP_ERROR_CODES: raise PubChemHTTPError(response) return response