Package rdkit :: Package Chem :: Package Pharm2D :: Module Generate
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.Pharm2D.Generate

  1  # 
  2  # Copyright (C) 2002-2008 greg Landrum and Rational Discovery LLC 
  3  # 
  4  #   @@ All Rights Reserved @@ 
  5  #  This file is part of the RDKit. 
  6  #  The contents are covered by the terms of the BSD license 
  7  #  which is included in the file license.txt, found at the root 
  8  #  of the RDKit source tree. 
  9  # 
 10  """ generation of 2D pharmacophores 
 11   
 12  **Notes** 
 13   
 14    - The terminology for this gets a bit rocky, so here's a glossary of what 
 15      terms used here mean: 
 16   
 17        1) *N-point pharmacophore* a combination of N features along with 
 18           distances betwen them. 
 19   
 20        2) *N-point proto-pharmacophore*: a combination of N feature 
 21           definitions without distances.  Each N-point 
 22           proto-pharmacophore defines a manifold of potential N-point 
 23           pharmacophores. 
 24   
 25        3) *N-point scaffold*: a collection of the distances defining 
 26           an N-point pharmacophore without feature identities. 
 27   
 28    See Docs/Chem/Pharm2D.triangles.jpg for an illustration of the way 
 29    pharmacophores are broken into triangles and labelled. 
 30   
 31    See Docs/Chem/Pharm2D.signatures.jpg for an illustration of bit 
 32    numbering 
 33   
 34  """ 
 35  from __future__ import print_function 
 36   
 37  from rdkit.Chem.Pharm2D import Utils, SigFactory 
 38  from rdkit.RDLogger import logger 
 39   
 40  logger = logger() 
 41   
 42  _verbose = 0 
 43   
 44   
45 -def _ShortestPathsMatch(match, featureSet, sig, dMat, sigFactory):
46 """ Internal use only 47 48 """ 49 if _verbose: 50 print('match:', match) 51 nPts = len(match) 52 distsToCheck = Utils.nPointDistDict[nPts] 53 nDists = len(distsToCheck) 54 dist = [0] * nDists 55 bins = sigFactory.GetBins() 56 minD, maxD = bins[0][0], bins[-1][1] 57 58 for i in range(nDists): 59 pt0, pt1 = distsToCheck[i] 60 minSeen = maxD 61 for idx1 in match[pt0]: 62 for idx2 in match[pt1]: 63 minSeen = min(minSeen, dMat[idx1, idx2]) 64 if minSeen == 0 or minSeen < minD: 65 return 66 # FIX: this won't be an int if we're using the bond order. 67 d = int(minSeen) 68 # do a quick distance filter 69 if d == 0 or d < minD or d >= maxD: 70 return 71 dist[i] = d 72 73 idx = sigFactory.GetBitIdx(featureSet, dist, sortIndices=False) 74 if _verbose: 75 print('\t', dist, minD, maxD, idx) 76 77 if sigFactory.useCounts: 78 sig[idx] = sig[idx] + 1 79 else: 80 sig.SetBit(idx) 81 return idx
82 83
84 -def Gen2DFingerprint(mol, sigFactory, perms=None, dMat=None, bitInfo=None):
85 """ generates a 2D fingerprint for a molecule using the 86 parameters in _sig_ 87 88 **Arguments** 89 90 - mol: the molecule for which the signature should be generated 91 92 - sigFactory : the SigFactory object with signature parameters 93 NOTE: no preprocessing is carried out for _sigFactory_. 94 It *must* be pre-initialized. 95 96 - perms: (optional) a sequence of permutation indices limiting which 97 pharmacophore combinations are allowed 98 99 - dMat: (optional) the distance matrix to be used 100 101 - bitInfo: (optional) used to return the atoms involved in the bits 102 103 """ 104 if not isinstance(sigFactory, SigFactory.SigFactory): 105 raise ValueError('bad factory') 106 featFamilies = sigFactory.GetFeatFamilies() 107 if _verbose: 108 print('* feat famillies:', featFamilies) 109 nFeats = len(featFamilies) 110 minCount = sigFactory.minPointCount 111 maxCount = sigFactory.maxPointCount 112 if maxCount > 3: 113 logger.warning(' Pharmacophores with more than 3 points are not currently supported.\n' + 114 'Setting maxCount to 3.') 115 maxCount = 3 116 117 # generate the molecule's distance matrix, if required 118 if dMat is None: 119 from rdkit import Chem 120 useBO = sigFactory.includeBondOrder 121 dMat = Chem.GetDistanceMatrix(mol, useBO) 122 123 # generate the permutations, if required 124 if perms is None: 125 perms = [] 126 for count in range(minCount, maxCount + 1): 127 perms += Utils.GetIndexCombinations(nFeats, count) 128 129 # generate the matches: 130 featMatches = sigFactory.GetMolFeats(mol) 131 if _verbose: 132 print(' featMatches:', featMatches) 133 134 sig = sigFactory.GetSignature() 135 for perm in perms: 136 # the permutation is a combination of feature indices 137 # defining the feature set for a proto-pharmacophore 138 featClasses = [0] 139 for i in range(1, len(perm)): 140 if perm[i] == perm[i - 1]: 141 featClasses.append(featClasses[-1]) 142 else: 143 featClasses.append(featClasses[-1] + 1) 144 145 # Get a set of matches at each index of 146 # the proto-pharmacophore. 147 matchPerms = [featMatches[x] for x in perm] 148 if _verbose: 149 print('\n->Perm: %s' % (str(perm))) 150 print(' matchPerms: %s' % (str(matchPerms))) 151 152 # Get all unique combinations of those possible matches: 153 matchesToMap = Utils.GetUniqueCombinations(matchPerms, featClasses) 154 for i, entry in enumerate(matchesToMap): 155 entry = [x[1] for x in entry] 156 matchesToMap[i] = entry 157 if _verbose: 158 print(' mtM:', matchesToMap) 159 160 for match in matchesToMap: 161 if sigFactory.shortestPathsOnly: 162 idx = _ShortestPathsMatch(match, perm, sig, dMat, sigFactory) 163 if idx is not None and bitInfo is not None: 164 l = bitInfo.get(idx, []) 165 l.append(match) 166 bitInfo[idx] = l 167 return sig
168