Coverage for src/writer.py: 100%
30 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-22 17:59 +0530
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-22 17:59 +0530
1"""Write Data to file."""
2from __future__ import annotations
4import csv
5import json
6from pathlib import Path
8from tqdm import tqdm
9from typing_extensions import NotRequired, TypedDict, Unpack
12class WriterParams(TypedDict):
13 """Writer parameters."""
15 output_format: NotRequired[str]
16 delimiter: NotRequired[str]
19class Writer(object):
20 """Write Data to file."""
22 @staticmethod
23 def write(
24 total_records: int,
25 out_file: str,
26 headers: list[str],
27 **kwargs: Unpack[WriterParams],
28 ) -> None:
29 """Write data to output file."""
30 output_format = kwargs.get("output_format", "csv")
31 if output_format == "csv":
32 Writer._write_to_csv(total_records, out_file, headers, str(kwargs.get("delimiter", ",")))
33 else:
34 msg = f"Format {output_format} is not supported"
35 raise NotImplementedError(msg)
37 @staticmethod
38 def _write_to_csv(total_records: int, out_file: str, headers: list[str], delimiter: str) -> None:
39 """Write content to CSV file."""
40 temp_file = f"{out_file}.tmp"
41 with Path(out_file).open(mode="w", encoding="utf-8") as output_file:
42 csv_writer = csv.DictWriter(output_file, fieldnames=headers, delimiter=delimiter)
43 csv_writer.writeheader()
44 bar = tqdm(
45 desc=out_file,
46 total=total_records,
47 unit="docs",
48 colour="green",
49 )
50 with Path(temp_file).open(encoding="utf-8") as file:
51 for _timer, line in enumerate(file, start=1):
52 bar.update(1)
53 csv_writer.writerow(json.loads(line))
55 bar.close()
56 Path(temp_file).unlink(missing_ok=True)