Source code for chemistry_tools.pubchem.synonyms

#!/usr/bin/env python3
#
#  synonyms.py
"""
Functions for obtaining the synonyms of a compound from the PubChem database.
"""
#
#  Copyright (c) 2019-2020 Dominic Davis-Foster <dominic@davis-foster.co.uk>
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU Lesser General Public License as
#  published by the Free Software Foundation; either version 3 of the
#  License, or (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
#  GNU Lesser General Public License for more details.
#
#  You should have received a copy of the GNU Lesser General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
#  MA 02110-1301, USA.
#

# stdlib
from typing import Dict, List, Sequence, Union

# this package
from chemistry_tools.pubchem.enums import PubChemNamespace
from chemistry_tools.pubchem.pug_rest import do_rest_get

__all__ = ["Synonyms", "get_synonyms", "rest_get_synonyms"]


[docs]class Synonyms(List[str]): """ Contains a list of synonyms for a compound. :param initlist: The content to initialise the list with. """ def __init__(self, initlist): # noqa: MAN001 super().__init__() for val in initlist: self.append(str(val))
[docs] def __contains__(self, synonym) -> bool: # noqa: MAN001 """ Return ``synonym in self``. The comparison treats hyphens and underscores as whitespace. :param synonym: """ for v in self: if self._prep_contains(v) == self._prep_contains(synonym): return True return False
[docs] def append(self, synonym: str) -> None: """ Append ``synonym`` to the end of the list. :param synonym: """ if synonym not in self: super().append(str(synonym))
@staticmethod def _prep_contains(val: str) -> str: val = val.casefold() for remove in ['-', '_', ' ']: val = val.replace(remove, ' ') return val
[docs]def get_synonyms( identifier: Union[str, int, Sequence[Union[str, int]]], namespace: Union[PubChemNamespace, str] = PubChemNamespace.name, ) -> List[Dict]: """ Returns a list of synonyms for the compound with the given identifier. As more than one compound may be identified the results are returned in a list. :param identifier: Identifiers (e.g. name, CID) for the compound to look up. When using the CID namespace data for multiple compounds can be retrieved at once by supplying either a comma-separated string or a list. :param namespace: The type of identifier to look up. Valid values are in :class:`~.PubChemNamespace`. :return: List of dictionaries containing the CID and a list of synonyms for the compounds. """ data = rest_get_synonyms(identifier, namespace) results = [] for compound in data["InformationList"]["Information"]: parsed_data = { "CID": compound["CID"], "synonyms": Synonyms(compound["Synonym"][:20]), } results.append(parsed_data) return results
[docs]def rest_get_synonyms( identifier: Union[str, int, Sequence[Union[str, int]]], namespace: Union[PubChemNamespace, str] = PubChemNamespace.name, **kwargs, ) -> Dict: """ Get the list of synonyms for the given compound. :param identifier: Identifiers (e.g. name, CID) for the compound to look up. When using the CID namespace data for multiple compounds can be retrieved at once by supplying either a comma-separated string or a list. :param namespace: The type of identifier to look up. Valid values are in :class:`~.PubChemNamespace` :param kwargs: Optional arguments that ``json.loads`` takes. :raises ValueError: If the response body does not contain valid JSON. :return: Parsed JSON data. """ return do_rest_get(namespace, identifier, domain="synonyms").json(**kwargs)