Source code for chemistry_tools.pubchem.full_record

#!/usr/bin/env python3
#
#  full_record.py
"""
Functions for access the complete set of data held by PubChem for a compound.
"""
#
#  Copyright (c) 2020 Dominic Davis-Foster <dominic@davis-foster.co.uk>
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU Lesser General Public License as published by
#  the Free Software Foundation; either version 3 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
#  GNU Lesser General Public License for more details.
#
#  You should have received a copy of the GNU Lesser General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
#  MA 02110-1301, USA.
#

# stdlib
from typing import Dict, List, Sequence, Union

# this package
from chemistry_tools.pubchem.enums import PubChemNamespace
from chemistry_tools.pubchem.properties import _parse_record_property
from chemistry_tools.pubchem.pug_rest import do_rest_get

__all__ = ["parse_full_record", "rest_get_full_record"]


[docs]def parse_full_record(record: Dict) -> List[Dict]: """ Parse the complete PubChem record for a compound. :param record: """ parsed_records = [] for compound in record["PC_Compounds"]: cid = compound["id"]["id"]["cid"] # print(cid) counts = compound["count"] # pprint(counts) properties = [] for prop in compound["props"]: prop = _parse_record_property(prop) properties.append(prop) if "bonds" in compound: bonds = compound["bonds"] else: bonds = {} if "charge" in compound: charge = compound["charge"] else: charge = 0 parsed_records.append( dict( atoms=compound["atoms"], bonds=bonds, charge=charge, coords=compound["coords"], properties=properties, cid=cid, counts=counts ) ) return parsed_records
[docs]def rest_get_full_record( identifier: Union[str, int, Sequence[Union[str, int]]], namespace: Union[PubChemNamespace, str] = PubChemNamespace.name, record_type: str = "2d", **kwargs, ) -> Dict: """ Obtains the full record for the given compound from the PubChem REST API. :param identifier: Identifiers (e.g. name, CID) for the compound to look up. When using the CID namespace data for multiple compounds can be retrieved at once by supplying either a comma-separated string or a list. :param namespace: The type of identifier to look up. Valid values are in :class:`~.PubChemNamespace`. :param record_type: :param kwargs: Optional arguments that ``json.loads`` takes. :raises ValueError: If the response body does not contain valid JSON. :return: Parsed JSON data """ return do_rest_get(namespace, identifier, record_type=record_type).json(**kwargs)