#!/usr/bin/env python3
#
# pug_rest.py
"""
Functions for interacting with PubChem PUG_REST API.
"""
# Copyright (c) 2019-2021 Dominic Davis-Foster <dominic@davis-foster.co.uk>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as
# published by the Free Software Foundation; either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301, USA.
#
# Based on PubChemPy https://github.com/mcs07/PubChemPy/blob/master/LICENSE
# | Copyright 2014 Matt Swain <m.swain@me.com>
# | Licensed under the MIT License
# |
# | Permission is hereby granted, free of charge, to any person obtaining a copy
# | of this software and associated documentation files (the "Software"), to deal
# | in the Software without restriction, including without limitation the rights
# | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# | copies of the Software, and to permit persons to whom the Software is
# | furnished to do so, subject to the following conditions:
#
# | The above copyright notice and this permission notice shall be included in
# | all copies or substantial portions of the Software.
#
# | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# | THE SOFTWARE.
#
# stdlib
import time
from typing import Dict, Iterable, Iterator, List, Optional, Sequence, Union
from urllib.parse import quote
# 3rd party
import requests
# this package
from chemistry_tools.pubchem import API_BASE
from chemistry_tools.pubchem.enums import PubChemFormats, PubChemNamespace
from chemistry_tools.pubchem.errors import HTTP_ERROR_CODES, PubChemHTTPError
from chemistry_tools.pubchem.utils import _force_sequence_or_csv, _make_base_url
__all__ = ["get_full_json", "async_get", "request", "do_rest_get"]
[docs]def do_rest_get(
namespace: Union[PubChemNamespace, str],
identifier: Union[str, int, Sequence[Union[str, int]]],
format_: Union[PubChemFormats, str] = PubChemFormats.JSON,
domain: Optional[str] = None,
record_type: str = "2d",
png_width: int = 300,
png_height: int = 300,
) -> requests.Response:
r"""
Responsible for performing the actual GET request.
:param namespace: The type of identifier to look up. Valid values are in :class:`~.PubChemNamespace`.
:param identifier: Identifiers (e.g. name, CID) for the compounds to look up.
When using the CID namespace data for multiple compounds can be retrieved at once by
supplying either a comma-separated string or a list.
:param format\_: The file format to retrieve the data in.
Valid values are in :class:`~.PubChemFormats`, plus ``'PNG'``.
:param domain:
:param record_type:
:param png_width:
:param png_height:
"""
# domain = description, synonyms, or property followed by a comma-separated list of desired properties
if not PubChemNamespace.is_valid_value(namespace):
raise ValueError(f"'{namespace}' is not a valid value for 'namespace'")
if not PubChemFormats.is_valid_value(format_):
raise ValueError(f"'{format_}' is not a valid value for 'format_'")
parsed_identifier: List[str]
if namespace == PubChemNamespace.cid:
parsed_identifier = _force_sequence_or_csv(identifier, "identifier")
else:
parsed_identifier = [str(identifier)]
query_params = {}
if str(format_).upper() == str(PubChemFormats.PNG):
query_params["image_size"] = f"{png_width}x{png_height}"
try:
r = do_cached_request(namespace, parsed_identifier, format_, domain, record_type, query_params)
except requests.exceptions.ConnectionError:
r = do_cached_request(namespace, parsed_identifier, format_, domain, record_type, query_params)
if r.status_code in HTTP_ERROR_CODES:
raise PubChemHTTPError(r)
return r
def do_cached_request(
namespace: Union[PubChemNamespace, str],
identifier: Union[Iterable[str], str],
format_: Union[PubChemFormats, str],
domain: Optional[str],
record_type: str,
query_params: Dict,
) -> requests.Response:
r"""
Responsible for performing cached requests.
:param namespace: The type of identifier to look up. Valid values are in :class:`~.PubChemNamespace`.
:param identifier: Identifiers (e.g. name, CID) for the compounds to look up.
When using the CID namespace data for multiple compounds can be retrieved at once by
supplying either a comma-separated string or a list.
:param format\_: The file format to retrieve the data in. Valid values are in :class:`~.PubChemFormats`, plus "PNG"
:param domain:
:param record_type:
:param query_params:
"""
if domain:
r = (_make_base_url(namespace, identifier) / f"{domain}/{format_}").get(params=query_params)
else:
query_params["record_type"] = record_type
r = (_make_base_url(namespace, identifier) / str(format_)).get(params=query_params)
return r
[docs]def get_full_json(cid: Union[str, int]) -> str:
"""
Returns the full JSON record for the compound with the given ID.
:param cid:
"""
json_file = (API_BASE / f"_view/data/compound/{cid}/JSON").get()
return json_file.json()
[docs]def async_get(
identifier,
namespace: Union[PubChemNamespace, str] = "cid",
operation=None,
output="JSON",
searchtype=None,
**kwargs
) -> bytes:
r"""
Request wrapper that automatically handles asynchronous requests.
:param identifier: Identifiers (e.g. name, CID) for the compounds to look up.
When using the CID namespace data for multiple compounds can be retrieved at once by
supplying either a comma-separated string or a list.
:param namespace: The type of identifier to look up. Valid values are in :class:`~.PubChemNamespace`.
:param operation:
:param output:
:param searchtype:
:param \*\*kwargs: Keyword parameters passed along with the GET request.
"""
if (searchtype and searchtype != "xref") or namespace in ["formula"]:
r = request(identifier, namespace, None, "JSON", searchtype, **kwargs)
response = r.content
status = r.json()
if "Waiting" in status and "ListKey" in status["Waiting"]:
identifier = status["Waiting"]["ListKey"]
namespace = "listkey"
while "Waiting" in status and "ListKey" in status["Waiting"]:
time.sleep(2)
r = request(identifier, namespace, operation, "JSON", **kwargs)
response = r.content
status = r.json()
if output != "JSON":
response = request(identifier, namespace, operation, output, searchtype, **kwargs).content
else:
response = request(identifier, namespace, operation, output, searchtype, **kwargs).content
return response
[docs]def request(
identifier,
namespace: Union[PubChemNamespace, str] = "cid",
operation=None,
output: Union[PubChemFormats, str] = "JSON",
searchtype=None,
**kwargs,
) -> requests.Response:
r"""
Construct API request from parameters and return the response.
Full specification at http://pubchem.ncbi.nlm.nih.gov/pug_rest/PUG_REST.html
:param identifier: Identifiers (e.g. name, CID) for the compounds to look up.
When using the CID namespace data for multiple compounds can be retrieved at once by
supplying either a comma-separated string or a list.
:param namespace: The type of identifier to look up. Valid values are in :class:`~.PubChemNamespace`.
:param operation:
:param output:
:param searchtype:
:param \*\*kwargs: Keyword parameters passed along with the GET request.
"""
# If identifier is a list, join with commas into string
if isinstance(identifier, int):
identifier = str(identifier)
identifier = _force_sequence_or_csv(identifier, "identifier")
identifier = ','.join(str(x) for x in identifier)
# Build API URL
urlid, params = None, {}
# use this function when:
# namespace in ['listkey', 'formula']
# searchtype == 'xref'
urlid = quote(identifier.encode("utf8"))
comps: Iterator[str] = filter(None, ("compound", searchtype, namespace, urlid, operation, output))
apiurl = API_BASE / '/'.join(comps)
# Filter None values from kwargs
for key, val in kwargs.items():
if val is not None:
params[key] = val
# print(f'Request URL: {apiurl}')
# print(f'Request data: {params}')
response = apiurl.get(params=params)
if response.status_code in HTTP_ERROR_CODES:
raise PubChemHTTPError(response)
return response