Source code for chemistry_tools.pubchem.description

#!/usr/bin/env python3
#
#  description.py
"""
Functions to access the name and description of compounds in the PubChem database.
"""
#
#  Copyright (c) 2020 Dominic Davis-Foster <dominic@davis-foster.co.uk>
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU Lesser General Public License as published by
#  the Free Software Foundation; either version 3 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
#  GNU Lesser General Public License for more details.
#
#  You should have received a copy of the GNU Lesser General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
#  MA 02110-1301, USA.
#

# stdlib
from typing import Any, Dict, List, Sequence, Union

# this package
from chemistry_tools.pubchem.enums import PubChemNamespace
from chemistry_tools.pubchem.properties import rest_get_properties_json
from chemistry_tools.pubchem.pug_rest import do_rest_get

__all__ = [
		"get_iupac_name",
		"get_description",
		"get_common_name",
		"get_compound_id",
		"rest_get_description",
		"parse_description"
		]


[docs]def get_iupac_name(name: str) -> str: """ Returns the systematic IUPAC name for the compound with the given name. :param name: """ data = rest_get_properties_json(name, PubChemNamespace.name, properties="IUPACName") iupac_name = data["PropertyTable"]["Properties"][0]["IUPACName"] return str(iupac_name)
[docs]def get_description(name: str) -> str: """ Returns the description compound with the given name. :param name: """ data = rest_get_description(name, PubChemNamespace.name) parsed_data = parse_description(data) return parsed_data[0]["Description"]
[docs]def get_common_name(name: str) -> str: """ Returns the common name for the compound with the given name. :param name: """ data = rest_get_description(name, PubChemNamespace.name) parsed_data = parse_description(data) return parsed_data[0]["Title"]
[docs]def get_compound_id(name: str) -> str: """ Returns the compound ID (CID) for the compound with the given name. :param name: """ data = rest_get_description(name, PubChemNamespace.name) parsed_data = parse_description(data) return parsed_data[0]["CID"]
[docs]def rest_get_description( identifier: Union[str, int, Sequence[Union[str, int]]], namespace: Union[PubChemNamespace, str] = PubChemNamespace.name, **kwargs, ) -> Dict[str, Any]: """ Obtains the description for the given compound from the PubChem REST API. :param identifier: Identifiers (e.g. name, CID) for the compound to look up. When using the CID namespace data for multiple compounds can be retrieved at once by supplying either a comma-separated string or a list. :param namespace: The type of identifier to look up. Valid values are in :class:`~.PubChemNamespace`. :param kwargs: Optional arguments that ``json.loads`` takes. :raises ValueError: If the response body does not contain valid JSON. :return: Parsed JSON data """ return do_rest_get(namespace, identifier, domain="description").json(**kwargs)
[docs]def parse_description(description_data: Dict[str, Any]) -> List[Dict]: """ Parse raw data from the ``description`` endpoint of the REST API. :param description_data: :return: A list of dictionaries containing the CID, Title and Description for each compound """ compounds = {} fields = {"Title", "Description"} for entry in description_data["InformationList"]["Information"]: cid = entry["CID"] if cid not in compounds: compounds[cid] = {var: None for var in fields} compounds[cid]["CID"] = cid for var in fields: if var in entry: compounds[cid][var] = entry[var] return list(compounds.values())