Coverage for /Users/Dave/git_repos/_packages_/python/fundamentals/fundamentals/mysql/insert_list_of_dictionaries_into_database_tables.py : 17%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#!/usr/local/bin/python
2# encoding: utf-8
3"""
4*Given a list of dictionaries this function will insert each dictionary as a row into the given database table*
6:Author:
7 David Young
9:Date Created:
10 June 21, 2016
11"""
12from __future__ import print_function
13from __future__ import division
14################# GLOBAL IMPORTS ####################
15from builtins import str
16from builtins import range
17from past.utils import old_div
18import sys
19import os
20os.environ['TERM'] = 'vt100'
21from fundamentals import tools
22from fundamentals.mysql import convert_dictionary_to_mysql_table, writequery
23from fundamentals.fmultiprocess import fmultiprocess
24import time
25import re
26from fundamentals.mysql.database import database
27import pandas as pd
28from datetime import datetime
29import numpy as np
32count = 0
33totalCount = 0
34globalDbConn = False
35sharedList = []
38def insert_list_of_dictionaries_into_database_tables(
39 dbConn,
40 log,
41 dictList,
42 dbTableName,
43 uniqueKeyList=[],
44 dateModified=False,
45 dateCreated=True,
46 batchSize=2500,
47 replace=False,
48 dbSettings=False):
49 """insert list of dictionaries into database tables
51 **Key Arguments:**
52 - ``dbConn`` -- mysql database connection
53 - ``log`` -- logger
54 - ``dictList`` -- list of python dictionaries to add to the database table
55 - ``dbTableName`` -- name of the database table
56 - ``uniqueKeyList`` -- a list of column names to append as a unique constraint on the database
57 - ``dateModified`` -- add the modification date as a column in the database
58 - ``dateCreated`` -- add the created date as a column in the database
59 - ``batchSize`` -- batch the insert commands into *batchSize* batches
60 - ``replace`` -- repalce row if a duplicate is found
61 - ``dbSettings`` -- pass in the database settings so multiprocessing can establish one connection per process (might not be faster)
63 **Return:**
64 - None
66 **Usage:**
68 .. code-block:: python
70 from fundamentals.mysql import insert_list_of_dictionaries_into_database_tables
71 insert_list_of_dictionaries_into_database_tables(
72 dbConn=dbConn,
73 log=log,
74 dictList=dictList,
75 dbTableName="test_insert_many",
76 uniqueKeyList=["col1", "col3"],
77 dateModified=False,
78 batchSize=2500
79 )
80 """
82 log.debug(
83 'starting the ````insert_list_of_dictionaries_into_database_tables`` function')
85 global count
86 global totalCount
87 global globalDbConn
88 global sharedList
90 reDate = re.compile('^[0-9]{4}-[0-9]{2}-[0-9]{2}T')
92 if dbSettings:
93 globalDbConn = dbSettings
94 else:
95 globalDbConn = dbConn
97 if len(dictList) == 0:
98 log.warning(
99 'the dictionary to be added to the database is empty' % locals())
100 return None
102 if len(dictList):
103 # FIND BUG IN MYSQL QUERY BY UNCOMMENTING
104 # tot = len(dictList)
105 # for index, d in enumerate(dictList):
106 # if index > 1:
107 # # Cursor up one line and clear line
108 # sys.stdout.write("\x1b[1A\x1b[2K")
110 # percent = (float(index) / float(tot)) * 100.
111 # print('%(index)s/%(tot)s (%(percent)1.1f%% done)' % locals())
113 # convert_dictionary_to_mysql_table(
114 # dbConn=dbConn,
115 # log=log,
116 # dictionary=d,
117 # dbTableName=dbTableName,
118 # uniqueKeyList=uniqueKeyList,
119 # dateModified=dateModified,
120 # reDatetime=reDate,
121 # replace=replace,
122 # dateCreated=dateCreated)
123 # sys.exit(0)
125 convert_dictionary_to_mysql_table(
126 dbConn=dbConn,
127 log=log,
128 dictionary=dictList[0],
129 dbTableName=dbTableName,
130 uniqueKeyList=uniqueKeyList,
131 dateModified=dateModified,
132 reDatetime=reDate,
133 replace=replace,
134 dateCreated=dateCreated)
135 dictList = dictList[1:]
137 dbConn.autocommit(False)
139 if len(dictList):
141 total = len(dictList)
142 batches = int(old_div(total, batchSize))
144 start = 0
145 end = 0
146 sharedList = []
147 for i in range(batches + 1):
148 end = end + batchSize
149 start = i * batchSize
150 thisBatch = dictList[start:end]
151 sharedList.append((thisBatch, end))
153 totalCount = total + 1
154 ltotalCount = totalCount
156 print("Starting to insert %(ltotalCount)s rows into %(dbTableName)s" % locals())
158 if dbSettings == False:
159 fmultiprocess(
160 log=log,
161 function=_insert_single_batch_into_database,
162 inputArray=list(range(len(sharedList))),
163 dbTableName=dbTableName,
164 uniqueKeyList=uniqueKeyList,
165 dateModified=dateModified,
166 replace=replace,
167 batchSize=batchSize,
168 reDatetime=reDate,
169 dateCreated=dateCreated
170 )
172 else:
173 fmultiprocess(log=log, function=_add_dictlist_to_database_via_load_in_file,
174 inputArray=list(range(len(sharedList))), dbTablename=dbTableName,
175 dbSettings=dbSettings, dateModified=dateModified)
177 sys.stdout.write("\x1b[1A\x1b[2K")
178 print("%(ltotalCount)s / %(ltotalCount)s rows inserted into %(dbTableName)s" % locals())
180 log.debug(
181 'completed the ``insert_list_of_dictionaries_into_database_tables`` function')
182 return None
185def _insert_single_batch_into_database(
186 batchIndex,
187 log,
188 dbTableName,
189 uniqueKeyList,
190 dateModified,
191 replace,
192 batchSize,
193 reDatetime,
194 dateCreated):
195 """*summary of function*
197 **Key Arguments:**
198 - ``batchIndex`` -- the index of the batch to insert
199 - ``dbConn`` -- mysql database connection
200 - ``log`` -- logger
202 **Return:**
203 - None
205 **Usage:**
206 .. todo::
208 add usage info
209 create a sublime snippet for usage
211 .. code-block:: python
213 usage code
214 """
215 log.debug('starting the ``_insert_single_batch_into_database`` function')
217 global totalCount
218 global globalDbConn
219 global sharedList
221 batch = sharedList[batchIndex]
223 reDate = reDatetime
225 if isinstance(globalDbConn, dict):
226 # SETUP ALL DATABASE CONNECTIONS
228 dbConn = database(
229 log=log,
230 dbSettings=globalDbConn,
231 autocommit=False
232 ).connect()
233 else:
234 dbConn = globalDbConn
236 count = batch[1]
237 if count > totalCount:
238 count = totalCount
239 ltotalCount = totalCount
241 inserted = False
242 while inserted == False:
244 if not replace:
245 insertVerb = "INSERT IGNORE"
246 else:
247 insertVerb = "INSERT IGNORE"
249 uniKeys = set().union(*(list(d.keys()) for d in batch[0]))
250 tmp = []
251 tmp[:] = [m.replace(" ", "_").replace(
252 "-", "_") for m in uniKeys]
253 uniKeys = tmp
255 myKeys = '`,`'.join(uniKeys)
256 vals = [tuple([None if d[k] in ["None", None] else d[k]
257 for k in uniKeys]) for d in batch[0]]
258 valueString = ("%s, " * len(vals[0]))[:-2]
259 insertCommand = insertVerb + """ INTO `""" + dbTableName + \
260 """` (`""" + myKeys + """`, dateCreated) VALUES (""" + \
261 valueString + """, NOW())"""
263 if not dateCreated:
264 insertCommand = insertCommand.replace(
265 ", dateCreated)", ")").replace(", NOW())", ")")
267 dup = ""
268 if replace:
269 dup = " ON DUPLICATE KEY UPDATE "
270 for k in uniKeys:
271 dup = """%(dup)s %(k)s=values(%(k)s),""" % locals()
272 dup = """%(dup)s updated=1, dateLastModified=NOW()""" % locals()
274 insertCommand = insertCommand + dup
276 insertCommand = insertCommand.replace('\\""', '\\" "')
277 insertCommand = insertCommand.replace('""', "null")
278 insertCommand = insertCommand.replace('"None"', 'null')
280 message = ""
281 # log.debug('adding new data to the %s table; query: %s' %
282 # (dbTableName, addValue))
283 try:
284 message = writequery(
285 log=log,
286 sqlQuery=insertCommand,
287 dbConn=dbConn,
288 Force=True,
289 manyValueList=vals
290 )
291 except:
292 theseInserts = []
293 for aDict in batch[0]:
294 insertCommand, valueTuple = convert_dictionary_to_mysql_table(
295 dbConn=dbConn,
296 log=log,
297 dictionary=aDict,
298 dbTableName=dbTableName,
299 uniqueKeyList=uniqueKeyList,
300 dateModified=dateModified,
301 returnInsertOnly=True,
302 replace=replace,
303 reDatetime=reDate,
304 skipChecks=True
305 )
306 theseInserts.append(valueTuple)
308 message = ""
309 # log.debug('adding new data to the %s table; query: %s' %
310 # (dbTableName, addValue))
311 message = writequery(
312 log=log,
313 sqlQuery=insertCommand,
314 dbConn=dbConn,
315 Force=True,
316 manyValueList=theseInserts
317 )
319 if message == "unknown column":
320 for aDict in batch:
321 convert_dictionary_to_mysql_table(
322 dbConn=dbConn,
323 log=log,
324 dictionary=aDict,
325 dbTableName=dbTableName,
326 uniqueKeyList=uniqueKeyList,
327 dateModified=dateModified,
328 reDatetime=reDate,
329 replace=replace
330 )
331 else:
332 inserted = True
334 dbConn.commit()
336 log.debug('completed the ``_insert_single_batch_into_database`` function')
337 return "None"
340def _add_dictlist_to_database_via_load_in_file(
341 masterListIndex,
342 dbTablename,
343 dbSettings,
344 dateModified=False):
345 """*load a list of dictionaries into a database table with load data infile*
347 **Key Arguments:**
349 - ``masterListIndex`` -- the index of the sharedList of dictionary lists to process
350 - ``dbTablename`` -- the name of the database table to add the list to
351 - ``dbSettings`` -- the dictionary of database settings
352 - ``log`` -- logger
353 - ``dateModified`` -- add a dateModified stamp with an updated flag to rows?
355 **Return:**
356 - None
358 **Usage:**
359 .. todo::
361 add usage info
362 create a sublime snippet for usage
364 .. code-block:: python
366 usage code
367 """
368 from fundamentals.logs import emptyLogger
369 log = emptyLogger()
370 log.debug('starting the ``_add_dictlist_to_database_via_load_in_file`` function')
372 global sharedList
374 dictList = sharedList[masterListIndex][0]
376 count = sharedList[masterListIndex][1]
377 if count > totalCount:
378 count = totalCount
379 ltotalCount = totalCount
381 # SETUP ALL DATABASE CONNECTIONS
382 dbConn = database(
383 log=log,
384 dbSettings=dbSettings
385 ).connect()
387 now = datetime.now()
388 tmpTable = now.strftime("tmp_%Y%m%dt%H%M%S%f")
390 # CREATE A TEMPORY TABLE TO ADD DATA TO
391 sqlQuery = """CREATE TEMPORARY TABLE %(tmpTable)s SELECT * FROM %(dbTablename)s WHERE 1=0;""" % locals()
392 writequery(
393 log=log,
394 sqlQuery=sqlQuery,
395 dbConn=dbConn
396 )
398 csvColumns = [k for d in dictList for k in list(d.keys())]
399 csvColumns = list(set(csvColumns))
400 csvColumnsString = (', ').join(csvColumns)
401 csvColumnsString = csvColumnsString.replace(u" dec,", u" decl,")
403 df = pd.DataFrame(dictList)
404 df.replace(['nan', 'None', '', 'NaN', np.nan], '\\N', inplace=True)
405 df.to_csv('/tmp/%(tmpTable)s' % locals(), sep="|",
406 index=False, escapechar="\\", quotechar='"', columns=csvColumns, encoding='utf-8')
408 sqlQuery = """LOAD DATA LOCAL INFILE '/tmp/%(tmpTable)s'
409INTO TABLE %(tmpTable)s
410FIELDS TERMINATED BY '|' OPTIONALLY ENCLOSED BY '"'
411IGNORE 1 LINES
412(%(csvColumnsString)s);""" % locals()
414 writequery(
415 log=log,
416 sqlQuery=sqlQuery,
417 dbConn=dbConn
418 )
420 updateStatement = ""
421 for i in csvColumns:
422 updateStatement += "`%(i)s` = VALUES(`%(i)s`), " % locals()
423 if dateModified:
424 updateStatement += "dateLastModified = NOW(), updated = 1"
425 else:
426 updateStatement = updateStatement[0:-2]
428 sqlQuery = """
429INSERT IGNORE INTO %(dbTablename)s
430SELECT * FROM %(tmpTable)s
431ON DUPLICATE KEY UPDATE %(updateStatement)s;""" % locals()
432 writequery(
433 log=log,
434 sqlQuery=sqlQuery,
435 dbConn=dbConn
436 )
438 sqlQuery = """DROP TEMPORARY TABLE %(tmpTable)s;""" % locals()
439 writequery(
440 log=log,
441 sqlQuery=sqlQuery,
442 dbConn=dbConn
443 )
445 try:
446 os.remove('/tmp/%(tmpTable)s' % locals())
447 except:
448 pass
450 dbConn.close()
452 log.debug(
453 'completed the ``_add_dictlist_to_database_via_load_in_file`` function')
454 return None
456# use the tab-trigger below for new function
457# xt-def-function