Source code for chemistry_tools.constants

#!/usr/bin/env python3
#
#  constants.py
#
#  Copyright (c) 2019-2020 Dominic Davis-Foster <dominic@davis-foster.co.uk>
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU Lesser General Public License as published by
#  the Free Software Foundation; either version 3 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU Lesser General Public License for more details.
#
#  You should have received a copy of the GNU Lesser General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
#  MA 02110-1301, USA.
#
#
#  Based on ChemPy (https://github.com/bjodah/chempy)
#  |  Copyright (c) 2015-2018, Björn Dahlgren
#  |  All rights reserved.
#  |
#  |  Redistribution and use in source and binary forms, with or without modification,
#  |  are permitted provided that the following conditions are met:
#  |
#  |    Redistributions of source code must retain the above copyright notice, this
#  |    list of conditions and the following disclaimer.
#  |
#  |    Redistributions in binary form must reproduce the above copyright notice, this
#  |    list of conditions and the following disclaimer in the documentation and/or
#  |    other materials provided with the distribution.
#  |
#  |  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
#  |  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
#  |  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
#  |  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
#  |  ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
#  |  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
#  |  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
#  |  ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
#  |  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
#  |  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#

from collections import namedtuple
import quantities
import logging

API_BASE = 'https://pubchem.ncbi.nlm.nih.gov/rest/pug'

log = logging.getLogger('pubchempy')
log.addHandler(logging.NullHandler())

text_types = str, bytes


_anions = {  # Incomplete
		'F-': 'fluoride',
		'Cl-': 'chloride',
		'Br-': 'bromide',
		'I-': 'iodide',
		'OH-': 'hydroxide',
		'CN-': 'cyanide',
		'SCN-': 'thiocyanate',
		'CO3-2': 'carbonate',
		'C2O4-2': 'oxalate',
		'HCO3-': 'hydrogencarbonate',
		'NO3-': 'nitrate',
		'NO2-': 'nitrite',
		'PO4-3': 'phospahte',
		'HPO4-2': 'hydrogenphospahte',
		'H2PO4-': 'dihydrogenphospahte',
		'P-3': 'phosphide',
		'SO4-2': 'sulphate',
		'HSO4-': 'hydrogensulphate',
		'SO3-2': 'sulphite',
		'HSO3-': 'hydrogensulphite',
		'S-2': 'sulfide',
		'ClO-': 'hypochlorite',
		'ClO2-': 'chlorite',
		'ClO3-': 'chlorate',
		'ClO4-': 'perchlorate',
		'CrO4-2': 'chromate(VI)',
		'Cr2O7-2': 'dichromate(VI)',
		'MnO4-2': 'manganate(VI)',
		'MnO4-': 'permanganate(VII)',
		'FeO4-2': 'ferrate(VI)',
		'OsO4-2': 'osmate(VI)',
		'Bo3-3': 'borate',
		'BiO3-': 'bismuthate(V)',
		}

_cations = {  # Incomplete
		'H3O+': 'hydronium',
		}

_cation_oxidation_states = {  # This needs to be reviewed, just from the top of my head
		'Cr': (2, 3),
		'Fe': (2, 3),
		'Mn': (2,),
		'Co': (2, 3),
		'Ni': (2, 3),
		'Cu': (1, 2, 3),
		'Ag': (1, 2),
		'Au': (3,),
		'Zn': (2,),
		'Cd': (2,),
		'Hg': (1, 2),  # Tricky: Hg2+2
		'Al': (3,),
		'Ga': (3,),
		'In': (3,),
		'Tl': (1, 3),
		'Sn': (2, 4),
		'Pb': (2, 4),
		'Bi': (3,),
		'Sb': (3,),
		}

# Allows properties to optionally be specified as underscore_separated, consistent with Compound attributes
PROPERTY_MAP = {
		'molecular_formula': 'MolecularFormula',
		'molecular_weight': 'MolecularWeight',
		'canonical_smiles': 'CanonicalSMILES',
		'isomeric_smiles': 'IsomericSMILES',
		'inchi': 'InChI',
		'inchikey': 'InChIKey',
		'iupac_name': 'IUPACName',
		'xlogp': 'XLogP',
		'exact_mass': 'ExactMass',
		'monoisotopic_mass': 'MonoisotopicMass',
		'tpsa': 'TPSA',
		'complexity': 'Complexity',
		'charge': 'Charge',
		'h_bond_donor_count': 'HBondDonorCount',
		'h_bond_acceptor_count': 'HBondAcceptorCount',
		'rotatable_bond_count': 'RotatableBondCount',
		'heavy_atom_count': 'HeavyAtomCount',
		'isotope_atom_count': 'IsotopeAtomCount',
		'atom_stereo_count': 'AtomStereoCount',
		'defined_atom_stereo_count': 'DefinedAtomStereoCount',
		'undefined_atom_stereo_count': 'UndefinedAtomStereoCount',
		'bond_stereo_count': 'BondStereoCount',
		'defined_bond_stereo_count': 'DefinedBondStereoCount',
		'undefined_bond_stereo_count': 'UndefinedBondStereoCount',
		'covalent_unit_count': 'CovalentUnitCount',
		'volume_3d': 'Volume3D',
		'conformer_rmsd_3d': 'ConformerModelRMSD3D',
		'conformer_model_rmsd_3d': 'ConformerModelRMSD3D',
		'x_steric_quadrupole_3d': 'XStericQuadrupole3D',
		'y_steric_quadrupole_3d': 'YStericQuadrupole3D',
		'z_steric_quadrupole_3d': 'ZStericQuadrupole3D',
		'feature_count_3d': 'FeatureCount3D',
		'feature_acceptor_count_3d': 'FeatureAcceptorCount3D',
		'feature_donor_count_3d': 'FeatureDonorCount3D',
		'feature_anion_count_3d': 'FeatureAnionCount3D',
		'feature_cation_count_3d': 'FeatureCationCount3D',
		'feature_ring_count_3d': 'FeatureRingCount3D',
		'feature_hydrophobe_count_3d': 'FeatureHydrophobeCount3D',
		'effective_rotor_count_3d': 'EffectiveRotorCount3D',
		'conformer_count_3d': 'ConformerCount3D',
		}


class CoordinateType:
	TWO_D = 1
	THREE_D = 2
	SUBMITTED = 3
	EXPERIMENTAL = 4
	COMPUTED = 5
	STANDARDIZED = 6
	AUGMENTED = 7
	ALIGNED = 8
	COMPACT = 9
	UNITS_ANGSTROMS = 10
	UNITS_NANOMETERS = 11
	UNITS_PIXEL = 12
	UNITS_POINTS = 13
	UNITS_STDBONDS = 14
	UNITS_UNKNOWN = 255


class ProjectCategory:
	MLSCN = 1
	MPLCN = 2
	MLSCN_AP = 3
	MPLCN_AP = 4
	JOURNAL_ARTICLE = 5
	ASSAY_VENDOR = 6
	LITERATURE_EXTRACTED = 7
	LITERATURE_AUTHOR = 8
	LITERATURE_PUBLISHER = 9
	RNAIGI = 10
	OTHER = 255


[docs]class Constant(namedtuple('__BaseConstant', 'name value unit symbol')): # TODO: docstring # make symbol and unit optional def __new__(cls, name: str, value: float, unit: quantities.quantity.Quantity = None, symbol: str = None): return super().__new__(cls, name, value, unit, symbol)
[docs] def as_quantity(self): """ Returns the constant as a :class:`quantities.quantity.Quantity` object. :rtype: :class:`quantities.quantity.Quantity` """ return self.value * self.unit
def __float__(self): return float(self.value) def __int__(self): return int(self.value)
# The following from periodictable # public domain data # Author: Paul Kienzle avogadro_number = avogadro_constant = Constant( name="Avogadro constant", value=6.02214179e23, unit=1 / quantities.mol, symbol="N<sub>A</sub>") # (30) plancks_constant = planck_constant = Constant( name="Planck's constant", value=4.13566733e-15 * (10 ** 34), unit=quantities.electron_volt / quantities.second, symbol="h") # (10) speed_of_light = Constant( name="Speed of Light", value=299792458, unit=quantities.m / quantities.second, symbol="c") # (exact) electron_radius = Constant( name="Electron Radius", value=2.8179402894e-15, unit=quantities.m, symbol="rₑ") # (58) # From NIST Reference on Constants, Units, and Uncertainty # http://physics.nist.gov/cuu/index.html # neutron mass = 1.008 664 915 97(43) u # atomic mass constant m_u = 1.660 538 782(83) x 10-27 kg neutron_mass = Constant( name="Neutron mass", value=1.00866491597, unit=quantities.atomic_mass_unit, symbol="n<sup>o</sup>") # (43) atomic_mass_constant = float(quantities.atomic_mass_unit.rescale(quantities.kg)) faraday_constant = Constant( name="Faraday constant", value=96485.3321233100184, unit=quantities.coulomb * (1 / quantities.mol), symbol="F") vacuum_permittivity = Constant( "Vacuum permittivity", value=8.8541878128e-12, unit=quantities.farad / quantities.metre, symbol="ε₀") boltzmann_constant = Constant( name="Boltzmann constant", value=1.380649e-23, unit=quantities.joule / quantities.kelvin, symbol="k<sub>B</sub>") molar_gas_constant = Constant( name="Molar gas constant", value=8.31446261815324, unit=quantities.joule / quantities.kelvin / quantities.mol, symbol="R") # IUPAC prefixes prefixes = { 1: "mono", 2: "di", 3: "tri", 4: "tetra", 5: "penta", 6: "hexa", 7: "hepta", 8: "octa", 9: "nona", 10: "deca", 11: "undeca", 12: "dodeca", 13: "trideca", 14: "tetradeca", 15: "pentadeca", 16: "hexadeca", 17: "heptadeca", 18: "octadeca", 19: "nonadeca", 20: "icosa", 21: "henicosa", 22: "docosa", 23: "tricosa", 30: "triaconta", 31: "hentriaconta", 32: "dotriaconta", 40: "tetraconta", 50: "pentaconta", 60: "hexaconta", 70: "heptaconta", 80: "octaconta", 90: "nonaconta", 100: "hecta", 200: "dicta", 300: "tricta", 400: "tetracta", 500: "pentacta", 600: "hexacta", 700: "heptacta", 800: "octacta", 900: "nonacta", 1000: "kilia", 2000: "dilia", 3000: "trilia", 4000: "tetralia", 5000: "pentalia", 6000: "hexalia", 7000: "heptalia", 8000: "octalia", 9000: "nonalia", }