Package rdkit :: Package ML :: Module CompositeRun
[hide private]
[frames] | no frames]

Source Code for Module rdkit.ML.CompositeRun

  1  # $Id$ 
  2  # 
  3  #  Copyright (C) 2002-2006  greg Landrum and Rational Discovery LLC 
  4  # 
  5  #   @@ All Rights Reserved @@ 
  6  #  This file is part of the RDKit. 
  7  #  The contents are covered by the terms of the BSD license 
  8  #  which is included in the file license.txt, found at the root 
  9  #  of the RDKit source tree. 
 10  # 
 11  """ contains a class to store parameters for and results from 
 12  Composite building 
 13   
 14  """ 
 15  from rdkit import RDConfig 
 16  from rdkit.Dbase.DbConnection import DbConnect 
 17  from rdkit.Dbase import DbModule 
 18   
 19   
20 -def SetDefaults(runDetails):
21 """ initializes a details object with default values 22 23 **Arguments** 24 25 - details: (optional) a _CompositeRun.CompositeRun_ object. 26 If this is not provided, the global _runDetails will be used. 27 28 **Returns** 29 30 the initialized _CompositeRun_ object. 31 32 33 """ 34 runDetails.nRuns = 1 35 runDetails.nModels = 10 36 runDetails.outName = '' 37 runDetails.badName = '' 38 runDetails.splitRun = 0 39 runDetails.splitFrac = 0.7 40 runDetails.lockRandom = 0 41 runDetails.randomActivities = 0 42 runDetails.shuffleActivities = 0 43 runDetails.replacementSelection = 0 44 45 # 46 # Tree Parameters 47 # 48 runDetails.useTrees = 1 49 runDetails.pruneIt = 0 50 runDetails.lessGreedy = 0 51 runDetails.limitDepth = -1 52 runDetails.recycleVars = 0 53 runDetails.randomDescriptors = 0 # toggles growing of random forests 54 55 # 56 # KNN Parameters 57 # 58 runDetails.useKNN = 0 59 runDetails.knnDistFunc = '' 60 runDetails.knnNeighs = 0 61 62 # 63 # SigTree Parameters 64 # 65 runDetails.useSigTrees = 0 66 runDetails.useCMIM = 0 67 runDetails.allowCollections = False 68 69 # 70 # Naive Bayes Classifier Parameters 71 # 72 runDetails.useNaiveBayes = 0 73 runDetails.mEstimateVal = -1.0 74 runDetails.useSigBayes = 0 75 76 # # 77 # # SVM Parameters 78 # # 79 # runDetails.useSVM = 0 80 # runDetails.svmKernel = SVM.radialKernel 81 # runDetails.svmType = SVM.cSVCType 82 # runDetails.svmGamma = None 83 # runDetails.svmCost = None 84 # runDetails.svmWeights = None 85 # runDetails.svmDataType = 'float' 86 # runDetails.svmDegree = 3 87 # runDetails.svmCoeff = 0.0 88 # runDetails.svmEps = 0.001 89 # runDetails.svmNu = 0.5 90 # runDetails.svmCache = 40 91 # runDetails.svmShrink = 1 92 # runDetails.svmDataType='float' 93 94 runDetails.bayesModel = 0 95 runDetails.dbName = '' 96 runDetails.dbUser = RDConfig.defaultDBUser 97 runDetails.dbPassword = RDConfig.defaultDBPassword 98 runDetails.dbWhat = '*' 99 runDetails.dbWhere = '' 100 runDetails.dbJoin = '' 101 runDetails.qTableName = '' 102 runDetails.qBounds = [] 103 runDetails.qBoundCount = '' 104 runDetails.activityBounds = [] 105 runDetails.activityBoundsVals = '' 106 runDetails.detailedRes = 0 107 runDetails.noScreen = 0 108 runDetails.threshold = 0.0 109 runDetails.filterFrac = 0.0 110 runDetails.filterVal = 0.0 111 runDetails.modelFilterVal = 0.0 112 runDetails.modelFilterFrac = 0.0 113 runDetails.internalHoldoutFrac = 0.3 114 runDetails.pickleDataFileName = '' 115 runDetails.startAt = None 116 runDetails.persistTblName = '' 117 runDetails.randomSeed = (23, 42) 118 runDetails.note = '' 119 120 return runDetails
121 122
123 -class CompositeRun:
124 """ class to store parameters for and results from Composite building 125 126 This class has a default set of fields which are added to the database. 127 128 By default these fields are stored in a tuple, so they are immutable. This 129 is probably what you want. 130 131 132 """ 133 fields = (("rundate", "varchar(32)"), 134 ("dbName", "varchar(200)"), 135 ("dbWhat", "varchar(200)"), 136 ("dbWhere", "varchar(200)"), 137 ("dbJoin", "varchar(200)"), 138 ("tableName", "varchar(80)"), 139 ("note", "varchar(120)"), 140 ("shuffled", "smallint"), 141 ("randomized", "smallint"), 142 ("overall_error", "float"), 143 ("holdout_error", "float"), 144 ("overall_fraction_dropped", "float"), 145 ("holdout_fraction_dropped", "float"), 146 ("overall_correct_conf", "float"), 147 ("overall_incorrect_conf", "float"), 148 ("holdout_correct_conf", "float"), 149 ("holdout_incorrect_conf", "float"), 150 ("overall_result_matrix", "varchar(256)"), 151 ("holdout_result_matrix", "varchar(256)"), 152 ("threshold", "float"), 153 ("splitFrac", "float"), 154 ("filterFrac", "float"), 155 ("filterVal", "float"), 156 ("modelFilterVal", "float"), 157 ("modelFilterFrac", "float"), 158 ("nModels", "int"), 159 ("limitDepth", "int"), 160 ("bayesModels", "int"), 161 ("qBoundCount", "varchar(3000)"), 162 ("activityBoundsVals", "varchar(200)"), 163 ("cmd", "varchar(500)"), 164 ("model", DbModule.binaryTypeName), ) 165
166 - def _CreateTable(self, cn, tblName):
167 """ *Internal Use only* 168 169 """ 170 names = map(lambda x: x.strip().upper(), cn.GetTableNames()) 171 if tblName.upper() not in names: 172 curs = cn.GetCursor() 173 fmt = [] 174 for name, value in self.fields: 175 fmt.append('%s %s' % (name, value)) 176 fmtStr = ','.join(fmt) 177 curs.execute('create table %s (%s)' % (tblName, fmtStr)) 178 cn.Commit() 179 else: 180 heads = [x.upper() for x in cn.GetColumnNames()] 181 curs = cn.GetCursor() 182 for name, value in self.fields: 183 if name.upper() not in heads: 184 curs.execute('alter table %s add %s %s' % (tblName, name, value)) 185 cn.Commit()
186
187 - def Store(self, db='models.gdb', table='results', user='sysdba', password='masterkey'):
188 """ adds the result to a database 189 190 **Arguments** 191 192 - db: name of the database to use 193 194 - table: name of the table to use 195 196 - user&password: connection information 197 198 """ 199 cn = DbConnect(db, table, user, password) 200 curs = cn.GetCursor() 201 self._CreateTable(cn, table) 202 203 cols = [] 204 vals = [] 205 for name, _ in self.fields: 206 try: 207 v = getattr(self, name) 208 except AttributeError: 209 pass 210 else: 211 cols.append('%s' % name) 212 vals.append(v) 213 214 nToDo = len(vals) 215 qs = ','.join([DbModule.placeHolder] * nToDo) 216 vals = tuple(vals) 217 218 cmd = 'insert into %s (%s) values (%s)' % (table, ','.join(cols), qs) 219 curs.execute(cmd, vals) 220 cn.Commit()
221
222 - def GetDataSet(self, **kwargs):
223 """ Returns a MLDataSet pulled from a database using our stored 224 values. 225 226 """ 227 from rdkit.ML.Data import DataUtils 228 data = DataUtils.DBToData(self.dbName, self.tableName, user=self.dbUser, 229 password=self.dbPassword, what=self.dbWhat, where=self.dbWhere, 230 join=self.dbJoin, **kwargs) 231 232 return data
233
234 - def GetDataSetInfo(self, **kwargs):
235 """ Returns a MLDataSet pulled from a database using our stored 236 values. 237 238 """ 239 conn = DbConnect(self.dbName, self.tableName) 240 res = conn.GetColumnNamesAndTypes(join=self.dbJoin, what=self.dbWhat, where=self.dbWhere) 241 return res
242