Source code for components.handlers

import os
import logging
import pandas as pd

from queue import Queue
from pathlib import Path
from datetime import datetime
from io import StringIO
from threading import Thread, current_thread, active_count
from typing import NoReturn, Protocol, runtime_checkable

from .containers import ReportProtocol


logger_main = logging.getLogger(__name__)


[docs]@runtime_checkable class WorkerFactoryProtocol(Protocol): """Protocol class for worker factory objects. :param queue: Shared, thread-safe queue. :type queue: Queue :param threads: Number of threads, equal to number of Workers to be deployed. :type threads: int """ queue: Queue threads: int
[docs] def create_workers(self) -> None: """Creates workers on independent threads """ ...
[docs] @staticmethod def active_workers() -> int: """Counts active works in current time. :return: Number of active workers. :rtype: int """ ...
[docs]@runtime_checkable class WorkerProtocol(Protocol): """Protocol class for worker factory objects. :param queue: Shared, thread-safe queue. :type queue: Queue """ def _read_stream(self, report: ReportProtocol) -> None: """Reads the stream of data kept in Report object via Pandas read method. Deletes response content from the object. :param report: Instance of the ReportProtocol object. :type report: ReportProtocol """ ... def _save_to_csv(self, report: ReportProtocol) -> None: """Saves readed data to CSV file using Pandas save method. :param report: Instance of the ReportProtocol object. :type report: ReportProtocol """ ... def _erase_report(self, report: ReportProtocol) -> None: """Erases the report data. :param report: Instance of the ReportProtocol object. :type report: ReportProtocol """ ...
[docs] def report_processing(self, report: ReportProtocol) -> None: """Orchiestrates the report processing. :param report: Instance of the ReportProtocol object. :type report: ReportProtocol """ ...
[docs] def run(self) -> NoReturn: """Starts listner process on sepearet thread, awaits objects in the queue. :return: Method never returns. :rtype: NoReturn """ ...
[docs]class WorkerFactory: """Concrete class representing WorkerFactory object. """ def __init__(self, queue: Queue, *, threads: int = 1): """Constructor method for WorkerFactory, automatically creates and deploys workers after initialization. :param queue: Shared, thread-safe queue. :type queue: Queue :param threads: Number of threads, equal to number of Workers to be deployed. Defaults to 1. :type threads: int """ self.queue: Queue = queue self.threads: int = threads self.create_workers()
[docs] def create_workers(self) -> None: """Deploys given number of workers. """ for num in range(self.threads): worker = Worker(self.queue) worker.name = f'Slave-{num}' worker.daemon = True worker.start() return None
[docs] @staticmethod def active_workers() -> int: """Returns number of currently active workers. :return: Number of workers. :rtype: int """ return active_count() - 1
[docs]class Worker(Thread): """Concrete class representing Worker object. """ def __init__(self, queue: Queue): """Constructor method for Worker. :param queue: Shared, thread-safe queue. :type queue: Queue """ Thread.__init__(self) self.queue = queue def _read_stream(self, report: ReportProtocol) -> None: """Reads report's response and save it as `content` atribute. Erases saved response. :param report: Instance of the ReportProtocol object. :type report: ReportProtocol """ logger_main.debug('Reading content of %s', report.name) try: report.content = pd.read_csv(StringIO(report.response), dtype='string', low_memory=False) except pd.errors.EmptyDataError as e: logger_main.warning('%s timeouted, attmpts: %s', report.name, report.attempt_count) report.downloaded = False finally: report.response = '' logger_main.debug( 'Removing last 5 lines, footer of %s', report.name) report.content = report.content.head(report.content.shape[0] - 5) return None def _parse_save_path(self, report: ReportProtocol) -> os.PathLike: """Parses path to save location. :param report: Instance of the ReportProtocol object. :type report: ReportProtocol :return: Path to save location :rtype: os.PathLike """ return Path(f'{"/".join([str(report.path), report.name])}.csv') def _save_to_csv(self, report: ReportProtocol) -> None: """Saves report content to CSV file. Sets object flags. :param report: Instance of the ReportProtocol object. :type report: ReportProtocol """ file_path = self._parse_save_path(report) logger_main.debug('Parsing path for %s -> %s', report.name, file_path) logger_main.debug('%s is saving file for %s -> %s', current_thread().name, report.name, file_path) try: report.content.to_csv(file_path, index=False) except pd.errors.ParserError as e: logger_main.warning('%s unexpected end of stream: %s', report.name, report.attempt_count) report.downloaded = False finally: logger_main.debug('%s saved %s -> %s', current_thread().name, report.name, file_path) report.downloaded = True report.pull_date = datetime.now() report.size = round( (os.stat(file_path).st_size / (1024 * 1024)), 1) report.processing_time = report.pull_date - report.created_date logger_main.debug('%s succesfully saved by %s at %s, operation took: %s, file size: %s', report.name, current_thread().name, report.pull_date, report.processing_time, report.size) return None def _erase_report(self, report: ReportProtocol) -> None: """Deletes report content in ReportProtocol object. :param report: Instance of the ReportProtocol object. :type report: ReportProtocol """ logger_main.debug('Deleting response and content for %s', report.name) report.content = pd.DataFrame() return None
[docs] def process_report(self, report: ReportProtocol) -> None: """Orchiestrates entire process of downloading the report. :param report: Instance of the ReportProtocol object. :type report: ReportProtocol """ if report.valid: self._read_stream(report) self._save_to_csv(report) self._erase_report(report) else: report.downloaded = True return None
[docs] def run(self) -> NoReturn: """begins to listen to the queue. Starts processing once will get item from the queue. Sends signal to the queue once task is done. :return: Function never returns. :rtype: NoReturn """ logger_main.debug('%s starting', current_thread().name) while True: report = self.queue.get() if report: logger_main.debug('%s processing %s', current_thread().name, report.name) try: self.process_report(report) except Exception as e: logger_main.debug( '%s failed while processing %s -> %s', current_thread().name, report.name, e) finally: logger_main.debug('%s finishing %s', current_thread().name, report.name) self.queue.task_done()
if __name__ == '__main__': pass