Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#!/usr/local/bin/python 

2# encoding: utf-8 

3""" 

4*Convert a python dictionary into rows of a mysql table* 

5 

6:Author: 

7 David Young 

8 

9:Date Created: 

10 June 21, 2016 

11""" 

12################# GLOBAL IMPORTS #################### 

13from builtins import zip 

14from builtins import str 

15from builtins import range 

16import sys 

17import os 

18os.environ['TERM'] = 'vt100' 

19import re 

20import yaml 

21import time 

22import datetime 

23import collections as c 

24import pymysql as mdb 

25from fundamentals import tools, times 

26from fundamentals.mysql import writequery, table_exists, readquery 

27import six 

28 

29 

30def convert_dictionary_to_mysql_table( 

31 log, 

32 dictionary, 

33 dbTableName, 

34 uniqueKeyList=[], 

35 dbConn=False, 

36 createHelperTables=False, 

37 dateModified=False, 

38 returnInsertOnly=False, 

39 replace=False, 

40 batchInserts=True, 

41 reDatetime=False, 

42 skipChecks=False, 

43 dateCreated=True): 

44 """convert dictionary to mysql table 

45 

46 **Key Arguments:** 

47 - ``log`` -- logger 

48 - ``dictionary`` -- python dictionary 

49 - ``dbConn`` -- the db connection 

50 - ``dbTableName`` -- name of the table you wish to add the data to (or create if it does not exist) 

51 - ``uniqueKeyList`` - a lists column names that need combined to create the primary key 

52 - ``createHelperTables`` -- create some helper tables with the main table, detailing original keywords etc 

53 - ``returnInsertOnly`` -- returns only the insert command (does not execute it) 

54 - ``dateModified`` -- add a modification date and updated flag to the mysql table 

55 - ``replace`` -- use replace instead of mysql insert statements (useful when updates are required) 

56 - ``batchInserts`` -- if returning insert statements return separate insert commands and value tuples 

57 - ``reDatetime`` -- compiled regular expression matching datetime (passing this in cuts down on execution time as it doesn't have to be recompiled everytime during multiple iterations of ``convert_dictionary_to_mysql_table``) 

58 - ``skipChecks`` -- skip reliability checks. Less robust but a little faster. 

59 - ``dateCreated`` -- add a timestamp for dateCreated? 

60 

61 **Return:** 

62 - ``returnInsertOnly`` -- the insert statement if requested 

63 

64 **Usage:** 

65 

66 To add a python dictionary to a database table, creating the table and/or columns if they don't yet exist: 

67 

68 .. code-block:: python 

69 

70 from fundamentals.mysql import convert_dictionary_to_mysql_table 

71 dictionary = {"a newKey": "cool", "and another": "super cool", 

72 "uniquekey1": "cheese", "uniqueKey2": "burgers"} 

73 

74 convert_dictionary_to_mysql_table( 

75 dbConn=dbConn, 

76 log=log, 

77 dictionary=dictionary, 

78 dbTableName="testing_table", 

79 uniqueKeyList=["uniquekey1", "uniqueKey2"], 

80 dateModified=False, 

81 returnInsertOnly=False, 

82 replace=True 

83 ) 

84 

85 Or just return the insert statement with a list of value tuples, i.e. do not execute the command on the database: 

86 

87 insertCommand, valueTuple = convert_dictionary_to_mysql_table( 

88 dbConn=dbConn, 

89 log=log, 

90 dictionary=dictionary, 

91 dbTableName="testing_table", 

92 uniqueKeyList=["uniquekey1", "uniqueKey2"], 

93 dateModified=False, 

94 returnInsertOnly=True, 

95 replace=False, 

96 batchInserts=True 

97 ) 

98 

99 print(insertCommand, valueTuple) 

100 

101 # OUT: 'INSERT IGNORE INTO `testing_table` 

102 # (a_newKey,and_another,dateCreated,uniqueKey2,uniquekey1) VALUES 

103 # (%s, %s, %s, %s, %s)', ('cool', 'super cool', 

104 # '2016-06-21T12:08:59', 'burgers', 'cheese') 

105 

106 You can also return a list of single insert statements using ``batchInserts = False``. Using ``replace = True`` will also add instructions about how to replace duplicate entries in the database table if found: 

107 

108 inserts = convert_dictionary_to_mysql_table( 

109 dbConn=dbConn, 

110 log=log, 

111 dictionary=dictionary, 

112 dbTableName="testing_table", 

113 uniqueKeyList=["uniquekey1", "uniqueKey2"], 

114 dateModified=False, 

115 returnInsertOnly=True, 

116 replace=True, 

117 batchInserts=False 

118 ) 

119 

120 print(inserts) 

121 

122 # OUT: INSERT INTO `testing_table` (a_newKey,and_another,dateCreated,uniqueKey2,uniquekey1) 

123 # VALUES ("cool" ,"super cool" ,"2016-09-14T13:12:08" ,"burgers" ,"cheese") 

124 # ON DUPLICATE KEY UPDATE a_newKey="cool", and_another="super 

125 # cool", dateCreated="2016-09-14T13:12:08", uniqueKey2="burgers", 

126 # uniquekey1="cheese" 

127 """ 

128 

129 log.debug('starting the ``convert_dictionary_to_mysql_table`` function') 

130 

131 if not reDatetime: 

132 reDatetime = re.compile('^[0-9]{4}-[0-9]{2}-[0-9]{2}T') 

133 

134 if not replace: 

135 insertVerb = "INSERT" 

136 else: 

137 insertVerb = "INSERT IGNORE" 

138 

139 if returnInsertOnly == False: 

140 # TEST THE ARGUMENTS 

141 if str(type(dbConn).__name__) != "Connection": 

142 message = 'Please use a valid MySQL DB connection.' 

143 log.critical(message) 

144 raise TypeError(message) 

145 

146 if not isinstance(dictionary, dict): 

147 message = 'Please make sure "dictionary" argument is a dict type.' 

148 log.critical(message) 

149 raise TypeError(message) 

150 

151 if not isinstance(uniqueKeyList, list): 

152 message = 'Please make sure "uniqueKeyList" is a list' 

153 log.critical(message) 

154 raise TypeError(message) 

155 

156 for i in uniqueKeyList: 

157 if i not in list(dictionary.keys()): 

158 message = 'Please make sure values in "uniqueKeyList" are present in the "dictionary" you are tring to convert' 

159 log.critical(message) 

160 raise ValueError(message) 

161 

162 for k, v in list(dictionary.items()): 

163 # log.debug('k: %s, v: %s' % (k, v,)) 

164 if isinstance(v, list) and len(v) != 2: 

165 message = 'Please make sure the list values in "dictionary" 2 items in length' 

166 log.critical("%s: in %s we have a %s (%s)" % 

167 (message, k, v, type(v))) 

168 raise ValueError(message) 

169 if isinstance(v, list): 

170 if not (isinstance(v[0], six.string_types) or isinstance(v[0], int) or isinstance(v[0], bool) or isinstance(v[0], float) or isinstance(v[0], int) or isinstance(v[0], datetime.date) or v[0] == None): 

171 message = 'Please make sure values in "dictionary" are of an appropriate value to add to the database, must be str, float, int or bool' 

172 log.critical("%s: in %s we have a %s (%s)" % 

173 (message, k, v, type(v))) 

174 raise ValueError(message) 

175 else: 

176 if not (isinstance(v, six.string_types) or isinstance(v, int) or isinstance(v, bool) or isinstance(v, float) or isinstance(v, datetime.date) or v == None or "int" in str(type(v))): 

177 this = type(v) 

178 message = 'Please make sure values in "dictionary" are of an appropriate value to add to the database, must be str, float, int or bool : %(k)s is a %(this)s' % locals( 

179 ) 

180 log.critical("%s: in %s we have a %s (%s)" % 

181 (message, k, v, type(v))) 

182 raise ValueError(message) 

183 

184 if not isinstance(createHelperTables, bool): 

185 message = 'Please make sure "createHelperTables" is a True or False' 

186 log.critical(message) 

187 raise TypeError(message) 

188 

189 # TEST IF TABLE EXISTS 

190 if not skipChecks: 

191 tableExists = table_exists.table_exists( 

192 dbConn=dbConn, 

193 log=log, 

194 dbTableName=dbTableName 

195 ) 

196 else: 

197 tableExists = False 

198 

199 # CREATE THE TABLE IF IT DOES NOT EXIST 

200 if tableExists is False: 

201 sqlQuery = """ 

202 CREATE TABLE IF NOT EXISTS `%(dbTableName)s` 

203 (`primaryId` bigint(20) NOT NULL AUTO_INCREMENT COMMENT 'An internal counter', 

204 `dateCreated` DATETIME NULL DEFAULT CURRENT_TIMESTAMP, 

205 `dateLastModified` DATETIME NULL DEFAULT CURRENT_TIMESTAMP, 

206 `updated` tinyint(4) DEFAULT '0', 

207 PRIMARY KEY (`primaryId`)) 

208 ENGINE=MyISAM AUTO_INCREMENT=0 DEFAULT CHARSET=latin1; 

209 """ % locals() 

210 writequery( 

211 log=log, 

212 sqlQuery=sqlQuery, 

213 dbConn=dbConn, 

214 

215 ) 

216 

217 qCreateColumn = '' 

218 formattedKey = '' 

219 formattedKeyList = [] 

220 myValues = [] 

221 

222 # ADD EXTRA COLUMNS TO THE DICTIONARY todo: do I need this? 

223 if dateModified: 

224 dictionary['dateLastModified'] = [ 

225 str(times.get_now_sql_datetime()), "date row was modified"] 

226 if replace == False: 

227 dictionary['updated'] = [0, "this row has been updated"] 

228 else: 

229 dictionary['updated'] = [1, "this row has been updated"] 

230 

231 # ITERATE THROUGH THE DICTIONARY AND GENERATE THE TABLE COLUMN WITH THE 

232 # NAME OF THE KEY, IF IT DOES NOT EXIST 

233 count = len(dictionary) 

234 i = 1 

235 for (key, value) in list(dictionary.items()): 

236 if (isinstance(value, list) and value[0] is None): 

237 del dictionary[key] 

238 # SORT THE DICTIONARY BY KEY 

239 odictionary = c.OrderedDict(sorted(dictionary.items())) 

240 for (key, value) in list(odictionary.items()): 

241 

242 formattedKey = key.replace(" ", "_").replace("-", "_") 

243 # DEC A KEYWORD IN MYSQL - NEED TO CHANGE BEFORE INGEST 

244 if formattedKey == u"dec": 

245 formattedKey = u"decl" 

246 if formattedKey == u"DEC": 

247 formattedKey = u"DECL" 

248 

249 formattedKeyList.extend([formattedKey]) 

250 if len(key) > 0: 

251 # CONVERT LIST AND FEEDPARSER VALUES TO YAML (SO I CAN PASS IT AS A 

252 # STRING TO MYSQL) 

253 if isinstance(value, list) and (isinstance(value[0], list)): 

254 value[0] = yaml.dump(value[0]) 

255 value[0] = str(value[0]) 

256 # REMOVE CHARACTERS THAT COLLIDE WITH MYSQL 

257 # JOIN THE VALUES TOGETHER IN A LIST - EASIER TO GENERATE THE MYSQL 

258 # COMMAND LATER 

259 if isinstance(value, str): 

260 value = value.replace('\\', '\\\\') 

261 value = value.replace('"', '\\"') 

262 try: 

263 udata = value.decode("utf-8", "ignore") 

264 value = udata.encode("ascii", "ignore") 

265 except: 

266 pass 

267 

268 # log.debug('udata: %(udata)s' % locals()) 

269 

270 if isinstance(value, list) and isinstance(value[0], str): 

271 myValues.extend(['%s' % value[0].strip()]) 

272 elif isinstance(value, list): 

273 myValues.extend(['%s' % (value[0], )]) 

274 else: 

275 myValues.extend(['%s' % (value, )]) 

276 

277 if returnInsertOnly == False: 

278 # CHECK IF COLUMN EXISTS YET 

279 colExists = \ 

280 "SELECT * FROM information_schema.COLUMNS WHERE TABLE_SCHEMA=DATABASE() AND COLUMN_NAME='" + \ 

281 formattedKey + "'AND TABLE_NAME='" + dbTableName + """'""" 

282 try: 

283 # log.debug('checking if the column '+formattedKey+' exists 

284 # in the '+dbTableName+' table') 

285 

286 rows = readquery( 

287 log=log, 

288 sqlQuery=colExists, 

289 dbConn=dbConn, 

290 ) 

291 except Exception as e: 

292 log.error('something went wrong' + str(e) + '\n') 

293 

294 # IF COLUMN DOESN'T EXIT - GENERATE IT 

295 if len(rows) == 0: 

296 qCreateColumn = """ALTER TABLE `%s` ADD `%s""" % ( 

297 dbTableName, formattedKey) 

298 if not isinstance(value, list): 

299 value = [value] 

300 if reDatetime.search(str(value[0])): 

301 # log.debug('Ok - a datetime string was found') 

302 qCreateColumn += '` datetime DEFAULT NULL' 

303 elif formattedKey == 'updated_parsed' or formattedKey == 'published_parsed' or formattedKey \ 

304 == 'feedName' or formattedKey == 'title': 

305 qCreateColumn += '` varchar(100) DEFAULT NULL' 

306 elif isinstance(value[0], ("".__class__, u"".__class__)) and len(value[0]) < 30: 

307 qCreateColumn += '` varchar(100) DEFAULT NULL' 

308 elif isinstance(value[0], ("".__class__, u"".__class__)) and len(value[0]) >= 30 and len(value[0]) < 80: 

309 qCreateColumn += '` varchar(100) DEFAULT NULL' 

310 elif isinstance(value[0], ("".__class__, u"".__class__)): 

311 columnLength = 450 + len(value[0]) * 2 

312 qCreateColumn += '` varchar(' + str( 

313 columnLength) + ') DEFAULT NULL' 

314 elif isinstance(value[0], int) and abs(value[0]) <= 9: 

315 qCreateColumn += '` tinyint DEFAULT NULL' 

316 elif isinstance(value[0], int): 

317 qCreateColumn += '` int DEFAULT NULL' 

318 elif isinstance(value[0], float) or isinstance(value[0], int): 

319 qCreateColumn += '` double DEFAULT NULL' 

320 elif isinstance(value[0], bool): 

321 qCreateColumn += '` tinyint DEFAULT NULL' 

322 elif isinstance(value[0], list): 

323 qCreateColumn += '` varchar(1024) DEFAULT NULL' 

324 else: 

325 # log.debug('Do not know what format to add this key in 

326 # MySQL - removing from dictionary: %s, %s' 

327 # % (key, type(value[0]))) 

328 formattedKeyList.pop() 

329 myValues.pop() 

330 qCreateColumn = None 

331 if qCreateColumn: 

332 # ADD COMMENT TO GIVE THE ORGINAL KEYWORD IF formatted FOR 

333 # MYSQL 

334 if key is not formattedKey: 

335 qCreateColumn += " COMMENT 'original keyword: " + \ 

336 key + """'""" 

337 # CREATE THE COLUMN IF IT DOES NOT EXIST 

338 try: 

339 log.info('creating the ' + 

340 formattedKey + ' column in the ' + dbTableName + ' table') 

341 writequery( 

342 log=log, 

343 sqlQuery=qCreateColumn, 

344 dbConn=dbConn 

345 ) 

346 

347 except Exception as e: 

348 # log.debug('qCreateColumn: %s' % (qCreateColumn, 

349 # )) 

350 log.error('could not create the ' + formattedKey + ' column in the ' + dbTableName 

351 + ' table -- ' + str(e) + '\n') 

352 

353 if returnInsertOnly == False: 

354 # GENERATE THE INDEX NAME - THEN CREATE INDEX IF IT DOES NOT YET EXIST 

355 if len(uniqueKeyList): 

356 for i in range(len(uniqueKeyList)): 

357 uniqueKeyList[i] = uniqueKeyList[ 

358 i].replace(" ", "_").replace("-", "_") 

359 if uniqueKeyList[i] == u"dec": 

360 uniqueKeyList[i] = u"decl" 

361 if uniqueKeyList[i] == u"DEC": 

362 uniqueKeyList[i] = u"DECL" 

363 

364 indexName = uniqueKeyList[0].replace(" ", "_").replace("-", "_") 

365 for i in range(len(uniqueKeyList) - 1): 

366 indexName += '_' + uniqueKeyList[i + 1] 

367 

368 indexName = indexName.lower().replace(" ", " ").replace(" ", "_") 

369 

370 sqlQuery = u"""SELECT COUNT(*) FROM INFORMATION_SCHEMA.STATISTICS WHERE TABLE_SCHEMA = DATABASE() AND TABLE_NAME = '""" + \ 

371 dbTableName + """' AND INDEX_NAME = '""" + indexName + """'""" 

372 rows = readquery( 

373 log=log, 

374 sqlQuery=sqlQuery, 

375 dbConn=dbConn, 

376 quiet=False 

377 ) 

378 

379 exists = rows[0]['COUNT(*)'] 

380 # log.debug('uniqueKeyList: %s' % (uniqueKeyList,)) 

381 if exists == 0: 

382 if isinstance(uniqueKeyList, list): 

383 uniqueKeyList = ','.join(uniqueKeyList) 

384 

385 addUniqueKey = 'ALTER TABLE `' + dbTableName + \ 

386 '` ADD unique ' + indexName + \ 

387 """ (""" + uniqueKeyList + ')' 

388 # log.debug('HERE IS THE COMMAND:'+addUniqueKey) 

389 writequery( 

390 log=log, 

391 sqlQuery=addUniqueKey, 

392 dbConn=dbConn 

393 ) 

394 

395 if returnInsertOnly == True and batchInserts == True: 

396 myKeys = '`,`'.join(formattedKeyList) 

397 valueString = ("%s, " * len(myValues))[:-2] 

398 insertCommand = insertVerb + """ INTO `""" + dbTableName + \ 

399 """` (`""" + myKeys + """`, dateCreated) VALUES (""" + \ 

400 valueString + """, NOW())""" 

401 mv = [] 

402 mv[:] = [None if m == u"None" else m for m in myValues] 

403 valueTuple = tuple(mv) 

404 

405 dup = "" 

406 if replace: 

407 dup = " ON DUPLICATE KEY UPDATE " 

408 for k, v in zip(formattedKeyList, mv): 

409 dup = """%(dup)s %(k)s=values(%(k)s),""" % locals() 

410 

411 insertCommand = insertCommand + dup 

412 

413 insertCommand = insertCommand.replace('\\""', '\\" "') 

414 insertCommand = insertCommand.replace('""', "null") 

415 insertCommand = insertCommand.replace('!!python/unicode:', '') 

416 insertCommand = insertCommand.replace('!!python/unicode', '') 

417 insertCommand = insertCommand.replace('"None"', 'null') 

418 insertCommand = insertCommand.replace('"null"', 'null') 

419 

420 if not dateCreated: 

421 insertCommand = insertCommand.replace( 

422 ", dateCreated)", ")").replace(", NOW())", ")") 

423 

424 return insertCommand, valueTuple 

425 

426 # GENERATE THE INSERT COMMAND - IGNORE DUPLICATE ENTRIES 

427 myKeys = '`,`'.join(formattedKeyList) 

428 myValues = '" ,"'.join(myValues) 

429 # log.debug(myValues+" ------ PRESTRIP") 

430 # REMOVE SOME CONVERSION NOISE 

431 myValues = myValues.replace('time.struct_time', '') 

432 myValues = myValues.replace( 

433 '- !!python/object/new:feedparser.FeedParserDict', '') 

434 myValues = myValues.replace( 

435 '!!python/object/new:feedparser.FeedParserDict', '') 

436 myValues = myValues.replace('dictitems:', '') 

437 myValues = myValues.replace('dictitems', '') 

438 myValues = myValues.replace('!!python/unicode:', '') 

439 myValues = myValues.replace('!!python/unicode', '') 

440 myValues = myValues.replace('"None"', 'null') 

441 myValues = myValues.replace('"null"', 'null') 

442 # myValues = myValues.replace('"None', 'null') 

443 

444 if myValues[-4:] != 'null': 

445 myValues += '"' 

446 

447 dup = "" 

448 if replace: 

449 dupValues = ('"' + myValues).split(" ,") 

450 dupKeys = formattedKeyList 

451 dup = dup + " ON DUPLICATE KEY UPDATE " 

452 for k, v in zip(dupKeys, dupValues): 

453 dup = """%(dup)s `%(k)s`=%(v)s,""" % locals() 

454 

455 if dateModified: 

456 dup = """%(dup)s updated=IF(""" % locals() 

457 for k, v in zip(dupKeys, dupValues): 

458 if v == "null": 

459 dup = """%(dup)s `%(k)s` is %(v)s AND """ % locals() 

460 else: 

461 dup = """%(dup)s `%(k)s`=%(v)s AND """ % locals() 

462 dup = dup[:-5] + ", 0, 1), dateLastModified=IF(" 

463 for k, v in zip(dupKeys, dupValues): 

464 if v == "null": 

465 dup = """%(dup)s `%(k)s` is %(v)s AND """ % locals() 

466 else: 

467 dup = """%(dup)s `%(k)s`=%(v)s AND """ % locals() 

468 dup = dup[:-5] + ", dateLastModified, NOW())" 

469 else: 

470 dup = dup[:-1] 

471 

472 # log.debug(myValues+" ------ POSTSTRIP") 

473 addValue = insertVerb + """ INTO `""" + dbTableName + \ 

474 """` (`""" + myKeys + """`, dateCreated) VALUES (\"""" + \ 

475 myValues + """, NOW()) %(dup)s """ % locals() 

476 

477 if not dateCreated: 

478 addValue = addValue.replace( 

479 ", dateCreated)", ")").replace(", NOW())", ")", 1) 

480 

481 addValue = addValue.replace('\\""', '\\" "') 

482 addValue = addValue.replace('""', "null") 

483 addValue = addValue.replace('!!python/unicode:', '') 

484 addValue = addValue.replace('!!python/unicode', '') 

485 addValue = addValue.replace('"None"', 'null') 

486 addValue = addValue.replace('"null"', 'null') 

487 # log.debug(addValue) 

488 

489 if returnInsertOnly == True: 

490 return addValue 

491 

492 message = "" 

493 try: 

494 # log.debug('adding new data to the %s table; query: %s' % 

495 # (dbTableName, addValue))" 

496 writequery( 

497 log=log, 

498 sqlQuery=addValue, 

499 dbConn=dbConn 

500 ) 

501 

502 except Exception as e: 

503 log.error("could not add new data added to the table '" + 

504 dbTableName + "' : " + str(e) + '\n') 

505 

506 log.debug('completed the ``convert_dictionary_to_mysql_table`` function') 

507 return None, None