Coverage for src/writer.py: 100%

30 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-09-22 17:59 +0530

1"""Write Data to file.""" 

2from __future__ import annotations 

3 

4import csv 

5import json 

6from pathlib import Path 

7 

8from tqdm import tqdm 

9from typing_extensions import NotRequired, TypedDict, Unpack 

10 

11 

12class WriterParams(TypedDict): 

13 """Writer parameters.""" 

14 

15 output_format: NotRequired[str] 

16 delimiter: NotRequired[str] 

17 

18 

19class Writer(object): 

20 """Write Data to file.""" 

21 

22 @staticmethod 

23 def write( 

24 total_records: int, 

25 out_file: str, 

26 headers: list[str], 

27 **kwargs: Unpack[WriterParams], 

28 ) -> None: 

29 """Write data to output file.""" 

30 output_format = kwargs.get("output_format", "csv") 

31 if output_format == "csv": 

32 Writer._write_to_csv(total_records, out_file, headers, str(kwargs.get("delimiter", ","))) 

33 else: 

34 msg = f"Format {output_format} is not supported" 

35 raise NotImplementedError(msg) 

36 

37 @staticmethod 

38 def _write_to_csv(total_records: int, out_file: str, headers: list[str], delimiter: str) -> None: 

39 """Write content to CSV file.""" 

40 temp_file = f"{out_file}.tmp" 

41 with Path(out_file).open(mode="w", encoding="utf-8") as output_file: 

42 csv_writer = csv.DictWriter(output_file, fieldnames=headers, delimiter=delimiter) 

43 csv_writer.writeheader() 

44 bar = tqdm( 

45 desc=out_file, 

46 total=total_records, 

47 unit="docs", 

48 colour="green", 

49 ) 

50 with Path(temp_file).open(encoding="utf-8") as file: 

51 for _timer, line in enumerate(file, start=1): 

52 bar.update(1) 

53 csv_writer.writerow(json.loads(line)) 

54 

55 bar.close() 

56 Path(temp_file).unlink(missing_ok=True)