Coverage for /Users/Dave/git_repos/_packages_/python/fundamentals/fundamentals/files/fileChunker.py : 44%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#!/usr/local/bin/python
2# encoding: utf-8
3"""
4*Iterate through large line-based files in batches of lines*
6:Author:
7 David Young
9:Date Created:
10 December 4, 2017
11"""
12################# GLOBAL IMPORTS ####################
13from builtins import range
14from builtins import object
15import sys
16import os
17os.environ['TERM'] = 'vt100'
18from fundamentals import tools
19import codecs
22class fileChunker(object):
23 """
24 *The fileChunker iterator - iterate over large line-based files to reduce memory footprint*
26 **Key Arguments:**
27 - ``filepath`` -- path to the large file to iterate over
28 - ``batchSize`` -- size of the chunks to return in lines
30 **Usage:**
32 To setup your logger, settings and database connections, please use the ``fundamentals`` package (`see tutorial here <http://fundamentals.readthedocs.io/en/latest/#tutorial>`_).
34 To initiate a fileChunker iterator and then process the file in batches of 100000 lines, use the following:
36 .. code-block:: python
38 from fundamentals.files import fileChunker
39 fc = fileChunker(
40 filepath="/path/to/large/file.csv",
41 batchSize=100000
42 )
43 for i in fc:
44 print len(i)
45 """
47 def __init__(self, filepath, batchSize):
48 self.filepath = filepath
49 self.batchSize = batchSize
51 try:
52 self.readFile = codecs.open(
53 self.filepath, encoding='utf-8', mode='r')
54 except IOError as e:
55 message = 'could not open the file %s' % (self.filepath,)
56 raise IOError(message)
58 def __iter__(self): return self
60 def __next__(self):
61 batch = []
62 for lines in range(self.batchSize):
63 l = self.readFile.readline()
64 if len(l):
65 batch.append(l)
66 if len(batch) == 0:
67 raise StopIteration
69 return batch