Module yaprak.yaprak
Expand source code
#!/usr/bin/env python
# SPDX-FileCopyrightText: © 2023 N. Sertac Artan <artans.github@gmail.com>
# SPDX-License-Identifier: MIT
import json
import os
from abc import ABC, abstractmethod
class Yaprak(ABC):
"""Base class for the yaprak package."""
def __init__(self, config = None):
'''
The yaprak class initializes with empty local variables. If a
configuration file name is given as an argument, this will trigger
the loading of a configuration file via the self.readConfig()
method.
Args:
config (str): (Optional) configuration filename. If given, the
self.readConfig() method will be called.
'''
self.config = {}
self.current_iteration = {}
self.globals = {}
self.__IDs = []
self.__processes = []
self.__inFileList = []
self.__outFileList = []
self.__outPath = None
if config:
self.readConfig(config)
self.globals = {'outPath': self.__outPath}
def run(self):
'''
The main method to call to go through all the input files, execute
all the processes, populate the output files, and report anything
pertinent to each input file.
This method runs for all instances (defined as an ID, an input file,
and an output file). If any of the parameters don't exist, the
operation stops.
For each process a process_spec is given (probably read from the
configuration file. This process_spec will be passed to the method,
and executed. The process_spec can include basic parameters such as
numbers, booleans, and strings, which the json file supports.
The process will be skipped if the process_spec does not have the
apply property. This way, processes can be disabled quickly by just
removing the apply property.
Args:
None
Returns:
None
'''
for instances in zip(self.__IDs, self.__inFileList, self.__outFileList):
self.current_iteration = {'ID': instances[0], 'inFile': instances[1],
'outFile': instances[2]}
self.load(instances[1])
for process_spec in self.__processes:
if process_spec['apply']:
function = getattr(self, process_spec['process'])
function(process_spec)
self.save(instances[2])
self.report()
@abstractmethod
def load(self, file):
'''
An abstract method as a placeholder for the file load function. The
child class must implement this function.
Args:
file (str): Name of the file to load
Returns:
None
'''
raise NotImplementedError("The load method in Yaprak is abstract\
and should be implemented in the child class")
@abstractmethod
def save(self, file):
'''
An abstract method as a placeholder for the file save function. The
child class must implement this function.
Args:
file (str): Name of the file to save
Returns:
None
'''
raise NotImplementedError("The save method in Yaprak is abstract\
and should be implemented in the child class")
@abstractmethod
def report(self):
'''
An abstract method as a placeholder for the report function. The
child class must implement this function.
Args:
None:
Returns:
None
'''
raise NotImplementedError("The report method in Yaprak is abstract\
and should be implemented in the child class")
# Config
def readConfig(self, file):
'''
Reads the json configuration file. The IDs, input file list, and
output file lists will be populated with full paths to the files, if
they exist in the configuration file. If output path is defined and
non-existing, the output path will be created. Processes will also
be populated if they are listed in the configuration file.
This function will be called automatically in __init__() if the
configuration file is given to __init__().
Args:
file (str): Name of the configuration file to load
Returns:
None
'''
self.config = load_json_file(file)
if "IDs" in self.config:
self.__IDs = self.config['IDs']
if "inFileList" in self.config:
self.__inFileList = fullPathFileList(self.config, 'in')
if "outFileList" in self.config:
self.__outFileList = fullPathFileList(self.config, 'out')
if "outPath" in self.config:
self.__outPath = self.config['outPath']
mkdir_p(self.__outPath)
if "processes" in self.config:
self.__processes = [x for x in self.config['processes']]
def setConfig(self, config):
self.config = config
def getConfig(self):
return self.config
def generateConfig(self):
pass
# IDs
def setIDs(self, IDs):
self.__IDs = IDs
def getIDs(self):
return self.__IDs
def generateIDs(self):
pass
# File Lists
def setInFileList(self, fileList):
self.__inFileList = fileList
def getInFileList(self):
return self.__inFileList
def setOutFileList(self, fileList):
self.__outFileList = fileList
def getOutFileList(self):
return self.__outFileList
# Processes
def setProcesses(self, processes):
self.__processes = processes
def getProcesses(self):
return self.__processes
def generateProcesses(self):
pass
class Summary(Yaprak):
"""Summary class for the yaprak package."""
def __init__(self, config = None):
'''
The summary class is similar to the base yaprak class. The primary
difference is that the Summary class saves the output to a single
output file, and individual files. This class is not intended to be
used as a way to aggregate many output files into a single file.
Instead, this class is aimed for the use cases, where some basic
information is collected from each file (e.g. statistics, or
feature vectors), and summarized in the single output file.
Args:
config (str): (Optional) configuration filename. If given, the
self.readConfig() method will be called.
'''
Yaprak.__init__(self, config)
self.outSummaryFile = None
self.readAdditionalConfig(config)
# Config
def readAdditionalConfig(self, config):
'''
Reads the global information from the configuration file, that is
the name of the output summary file, if it exists.
Args:
config (str): Name of the configuration file to load
Returns:
None
'''
if "outSummaryFile" in self.config:
self.outSummaryFile = fullPathFile(self.config, 'outSummary')
def run(self):
'''
This method is similar to the base class' run method in that it
loads all the input files, run the processes, and report the
pertinent information. The main difference is that in this class
only one output file is generated, which consolidates outputs from
all the input files.
Args:
None:
Returns:
None
'''
IDs = self.getIDs()
inFileList = self.getInFileList()
processes = self.getProcesses()
for instances in zip(IDs, inFileList):
self.current_iteration = {'ID': instances[0],
'inFile': instances[1]}
self.load(instances[1])
for process_spec in processes:
if process_spec['apply']:
function = getattr(self, process_spec['process'])
function(process_spec)
self.summarize()
self.report()
@abstractmethod
def summarize(self):
'''
An abstract method as a placeholder for the summarize function. The
child class must implement this function.
Args:
None:
Returns:
None
'''
raise NotImplementedError("The summarize method in Yaprak is abstract\
and should be implemented in the child class")
def fullPathFile(config, type):
pathName = type + "Path"
fileName = type + "File"
fullPathFileOutput = None
# Path and filename separate
if pathName in config and fileName in config:
path = config[pathName]
fileName = config[fileName]
fullPathFileOutput = path + fileName
# Full path
elif fileName in config:
fullPathFileOutput = config[fileName]
return fullPathFileOutput
def fullPathFileList(config, type):
pathName = type + "Path"
fileListName = type + "FileList"
# Grab file info from config.
fullPathFileListOutput = []
# Path and filename separate
if pathName in config and fileListName in config:
path = config[pathName]
fileList = config[fileListName]
fullPathFileListOutput = [path + file for file in fileList]
# Full path
elif fileListName in config:
fullPathFileListOutput = config[fileListName]
return fullPathFileListOutput
def load_json_file(fileName):
"""
Loads a json file as config
Args:
fileName (str): Name of input json file.
Returns:
dict: Json data as key-value pairs dictionary
"""
with open(fileName) as json_file:
json_data = json.load(json_file)
return json_data
def mkdir_p(path):
"""
Creates the directory path if it doesn't exist
Args:
path (str): Full path of the directory to be created
Returns:
int: 0 if directory exists, 1 if directory does not exist. If the
latter, this function creates the new library.
"""
if os.path.exists(path):
print("Output directory exists, overwrites are possible.")
return 0
print("Creating new directory " + path + ".")
os.makedirs(path)
return 1
Functions
def fullPathFile(config, type)
-
Expand source code
def fullPathFile(config, type): pathName = type + "Path" fileName = type + "File" fullPathFileOutput = None # Path and filename separate if pathName in config and fileName in config: path = config[pathName] fileName = config[fileName] fullPathFileOutput = path + fileName # Full path elif fileName in config: fullPathFileOutput = config[fileName] return fullPathFileOutput
def fullPathFileList(config, type)
-
Expand source code
def fullPathFileList(config, type): pathName = type + "Path" fileListName = type + "FileList" # Grab file info from config. fullPathFileListOutput = [] # Path and filename separate if pathName in config and fileListName in config: path = config[pathName] fileList = config[fileListName] fullPathFileListOutput = [path + file for file in fileList] # Full path elif fileListName in config: fullPathFileListOutput = config[fileListName] return fullPathFileListOutput
def load_json_file(fileName)
-
Loads a json file as config
Args
fileName
:str
- Name of input json file.
Returns
dict
- Json data as key-value pairs dictionary
Expand source code
def load_json_file(fileName): """ Loads a json file as config Args: fileName (str): Name of input json file. Returns: dict: Json data as key-value pairs dictionary """ with open(fileName) as json_file: json_data = json.load(json_file) return json_data
def mkdir_p(path)
-
Creates the directory path if it doesn't exist
Args
path
:str
- Full path of the directory to be created
Returns
int
- 0 if directory exists, 1 if directory does not exist. If the
latter, this function creates the new library.
Expand source code
def mkdir_p(path): """ Creates the directory path if it doesn't exist Args: path (str): Full path of the directory to be created Returns: int: 0 if directory exists, 1 if directory does not exist. If the latter, this function creates the new library. """ if os.path.exists(path): print("Output directory exists, overwrites are possible.") return 0 print("Creating new directory " + path + ".") os.makedirs(path) return 1
Classes
class Summary (config=None)
-
Summary class for the yaprak package.
The summary class is similar to the base yaprak class. The primary difference is that the Summary class saves the output to a single output file, and individual files. This class is not intended to be used as a way to aggregate many output files into a single file. Instead, this class is aimed for the use cases, where some basic information is collected from each file (e.g. statistics, or feature vectors), and summarized in the single output file.
Args
config
:str
- (Optional) configuration filename. If given, the self.readConfig() method will be called.
Expand source code
class Summary(Yaprak): """Summary class for the yaprak package.""" def __init__(self, config = None): ''' The summary class is similar to the base yaprak class. The primary difference is that the Summary class saves the output to a single output file, and individual files. This class is not intended to be used as a way to aggregate many output files into a single file. Instead, this class is aimed for the use cases, where some basic information is collected from each file (e.g. statistics, or feature vectors), and summarized in the single output file. Args: config (str): (Optional) configuration filename. If given, the self.readConfig() method will be called. ''' Yaprak.__init__(self, config) self.outSummaryFile = None self.readAdditionalConfig(config) # Config def readAdditionalConfig(self, config): ''' Reads the global information from the configuration file, that is the name of the output summary file, if it exists. Args: config (str): Name of the configuration file to load Returns: None ''' if "outSummaryFile" in self.config: self.outSummaryFile = fullPathFile(self.config, 'outSummary') def run(self): ''' This method is similar to the base class' run method in that it loads all the input files, run the processes, and report the pertinent information. The main difference is that in this class only one output file is generated, which consolidates outputs from all the input files. Args: None: Returns: None ''' IDs = self.getIDs() inFileList = self.getInFileList() processes = self.getProcesses() for instances in zip(IDs, inFileList): self.current_iteration = {'ID': instances[0], 'inFile': instances[1]} self.load(instances[1]) for process_spec in processes: if process_spec['apply']: function = getattr(self, process_spec['process']) function(process_spec) self.summarize() self.report() @abstractmethod def summarize(self): ''' An abstract method as a placeholder for the summarize function. The child class must implement this function. Args: None: Returns: None ''' raise NotImplementedError("The summarize method in Yaprak is abstract\ and should be implemented in the child class")
Ancestors
- Yaprak
- abc.ABC
Methods
def readAdditionalConfig(self, config)
-
Reads the global information from the configuration file, that is the name of the output summary file, if it exists.
Args
config
:str
- Name of the configuration file to load
Returns
None
Expand source code
def readAdditionalConfig(self, config): ''' Reads the global information from the configuration file, that is the name of the output summary file, if it exists. Args: config (str): Name of the configuration file to load Returns: None ''' if "outSummaryFile" in self.config: self.outSummaryFile = fullPathFile(self.config, 'outSummary')
def run(self)
-
This method is similar to the base class' run method in that it loads all the input files, run the processes, and report the pertinent information. The main difference is that in this class only one output file is generated, which consolidates outputs from all the input files.
Args
None
Returns
None
Expand source code
def run(self): ''' This method is similar to the base class' run method in that it loads all the input files, run the processes, and report the pertinent information. The main difference is that in this class only one output file is generated, which consolidates outputs from all the input files. Args: None: Returns: None ''' IDs = self.getIDs() inFileList = self.getInFileList() processes = self.getProcesses() for instances in zip(IDs, inFileList): self.current_iteration = {'ID': instances[0], 'inFile': instances[1]} self.load(instances[1]) for process_spec in processes: if process_spec['apply']: function = getattr(self, process_spec['process']) function(process_spec) self.summarize() self.report()
def summarize(self)
-
An abstract method as a placeholder for the summarize function. The child class must implement this function.
Args
None
Returns
None
Expand source code
@abstractmethod def summarize(self): ''' An abstract method as a placeholder for the summarize function. The child class must implement this function. Args: None: Returns: None ''' raise NotImplementedError("The summarize method in Yaprak is abstract\ and should be implemented in the child class")
Inherited members
class Yaprak (config=None)
-
Base class for the yaprak package.
The yaprak class initializes with empty local variables. If a configuration file name is given as an argument, this will trigger the loading of a configuration file via the self.readConfig() method.
Args
config
:str
- (Optional) configuration filename. If given, the self.readConfig() method will be called.
Expand source code
class Yaprak(ABC): """Base class for the yaprak package.""" def __init__(self, config = None): ''' The yaprak class initializes with empty local variables. If a configuration file name is given as an argument, this will trigger the loading of a configuration file via the self.readConfig() method. Args: config (str): (Optional) configuration filename. If given, the self.readConfig() method will be called. ''' self.config = {} self.current_iteration = {} self.globals = {} self.__IDs = [] self.__processes = [] self.__inFileList = [] self.__outFileList = [] self.__outPath = None if config: self.readConfig(config) self.globals = {'outPath': self.__outPath} def run(self): ''' The main method to call to go through all the input files, execute all the processes, populate the output files, and report anything pertinent to each input file. This method runs for all instances (defined as an ID, an input file, and an output file). If any of the parameters don't exist, the operation stops. For each process a process_spec is given (probably read from the configuration file. This process_spec will be passed to the method, and executed. The process_spec can include basic parameters such as numbers, booleans, and strings, which the json file supports. The process will be skipped if the process_spec does not have the apply property. This way, processes can be disabled quickly by just removing the apply property. Args: None Returns: None ''' for instances in zip(self.__IDs, self.__inFileList, self.__outFileList): self.current_iteration = {'ID': instances[0], 'inFile': instances[1], 'outFile': instances[2]} self.load(instances[1]) for process_spec in self.__processes: if process_spec['apply']: function = getattr(self, process_spec['process']) function(process_spec) self.save(instances[2]) self.report() @abstractmethod def load(self, file): ''' An abstract method as a placeholder for the file load function. The child class must implement this function. Args: file (str): Name of the file to load Returns: None ''' raise NotImplementedError("The load method in Yaprak is abstract\ and should be implemented in the child class") @abstractmethod def save(self, file): ''' An abstract method as a placeholder for the file save function. The child class must implement this function. Args: file (str): Name of the file to save Returns: None ''' raise NotImplementedError("The save method in Yaprak is abstract\ and should be implemented in the child class") @abstractmethod def report(self): ''' An abstract method as a placeholder for the report function. The child class must implement this function. Args: None: Returns: None ''' raise NotImplementedError("The report method in Yaprak is abstract\ and should be implemented in the child class") # Config def readConfig(self, file): ''' Reads the json configuration file. The IDs, input file list, and output file lists will be populated with full paths to the files, if they exist in the configuration file. If output path is defined and non-existing, the output path will be created. Processes will also be populated if they are listed in the configuration file. This function will be called automatically in __init__() if the configuration file is given to __init__(). Args: file (str): Name of the configuration file to load Returns: None ''' self.config = load_json_file(file) if "IDs" in self.config: self.__IDs = self.config['IDs'] if "inFileList" in self.config: self.__inFileList = fullPathFileList(self.config, 'in') if "outFileList" in self.config: self.__outFileList = fullPathFileList(self.config, 'out') if "outPath" in self.config: self.__outPath = self.config['outPath'] mkdir_p(self.__outPath) if "processes" in self.config: self.__processes = [x for x in self.config['processes']] def setConfig(self, config): self.config = config def getConfig(self): return self.config def generateConfig(self): pass # IDs def setIDs(self, IDs): self.__IDs = IDs def getIDs(self): return self.__IDs def generateIDs(self): pass # File Lists def setInFileList(self, fileList): self.__inFileList = fileList def getInFileList(self): return self.__inFileList def setOutFileList(self, fileList): self.__outFileList = fileList def getOutFileList(self): return self.__outFileList # Processes def setProcesses(self, processes): self.__processes = processes def getProcesses(self): return self.__processes def generateProcesses(self): pass
Ancestors
- abc.ABC
Subclasses
Methods
def generateConfig(self)
-
Expand source code
def generateConfig(self): pass
def generateIDs(self)
-
Expand source code
def generateIDs(self): pass
def generateProcesses(self)
-
Expand source code
def generateProcesses(self): pass
def getConfig(self)
-
Expand source code
def getConfig(self): return self.config
def getIDs(self)
-
Expand source code
def getIDs(self): return self.__IDs
def getInFileList(self)
-
Expand source code
def getInFileList(self): return self.__inFileList
def getOutFileList(self)
-
Expand source code
def getOutFileList(self): return self.__outFileList
def getProcesses(self)
-
Expand source code
def getProcesses(self): return self.__processes
def load(self, file)
-
An abstract method as a placeholder for the file load function. The child class must implement this function.
Args
file
:str
- Name of the file to load
Returns
None
Expand source code
@abstractmethod def load(self, file): ''' An abstract method as a placeholder for the file load function. The child class must implement this function. Args: file (str): Name of the file to load Returns: None ''' raise NotImplementedError("The load method in Yaprak is abstract\ and should be implemented in the child class")
def readConfig(self, file)
-
Reads the json configuration file. The IDs, input file list, and output file lists will be populated with full paths to the files, if they exist in the configuration file. If output path is defined and non-existing, the output path will be created. Processes will also be populated if they are listed in the configuration file.
This function will be called automatically in init() if the configuration file is given to init().
Args
file
:str
- Name of the configuration file to load
Returns
None
Expand source code
def readConfig(self, file): ''' Reads the json configuration file. The IDs, input file list, and output file lists will be populated with full paths to the files, if they exist in the configuration file. If output path is defined and non-existing, the output path will be created. Processes will also be populated if they are listed in the configuration file. This function will be called automatically in __init__() if the configuration file is given to __init__(). Args: file (str): Name of the configuration file to load Returns: None ''' self.config = load_json_file(file) if "IDs" in self.config: self.__IDs = self.config['IDs'] if "inFileList" in self.config: self.__inFileList = fullPathFileList(self.config, 'in') if "outFileList" in self.config: self.__outFileList = fullPathFileList(self.config, 'out') if "outPath" in self.config: self.__outPath = self.config['outPath'] mkdir_p(self.__outPath) if "processes" in self.config: self.__processes = [x for x in self.config['processes']]
def report(self)
-
An abstract method as a placeholder for the report function. The child class must implement this function.
Args
None
Returns
None
Expand source code
@abstractmethod def report(self): ''' An abstract method as a placeholder for the report function. The child class must implement this function. Args: None: Returns: None ''' raise NotImplementedError("The report method in Yaprak is abstract\ and should be implemented in the child class")
def run(self)
-
The main method to call to go through all the input files, execute all the processes, populate the output files, and report anything pertinent to each input file.
This method runs for all instances (defined as an ID, an input file, and an output file). If any of the parameters don't exist, the operation stops.
For each process a process_spec is given (probably read from the configuration file. This process_spec will be passed to the method, and executed. The process_spec can include basic parameters such as numbers, booleans, and strings, which the json file supports.
The process will be skipped if the process_spec does not have the apply property. This way, processes can be disabled quickly by just removing the apply property.
Args
None
Returns
None
Expand source code
def run(self): ''' The main method to call to go through all the input files, execute all the processes, populate the output files, and report anything pertinent to each input file. This method runs for all instances (defined as an ID, an input file, and an output file). If any of the parameters don't exist, the operation stops. For each process a process_spec is given (probably read from the configuration file. This process_spec will be passed to the method, and executed. The process_spec can include basic parameters such as numbers, booleans, and strings, which the json file supports. The process will be skipped if the process_spec does not have the apply property. This way, processes can be disabled quickly by just removing the apply property. Args: None Returns: None ''' for instances in zip(self.__IDs, self.__inFileList, self.__outFileList): self.current_iteration = {'ID': instances[0], 'inFile': instances[1], 'outFile': instances[2]} self.load(instances[1]) for process_spec in self.__processes: if process_spec['apply']: function = getattr(self, process_spec['process']) function(process_spec) self.save(instances[2]) self.report()
def save(self, file)
-
An abstract method as a placeholder for the file save function. The child class must implement this function.
Args
file
:str
- Name of the file to save
Returns
None
Expand source code
@abstractmethod def save(self, file): ''' An abstract method as a placeholder for the file save function. The child class must implement this function. Args: file (str): Name of the file to save Returns: None ''' raise NotImplementedError("The save method in Yaprak is abstract\ and should be implemented in the child class")
def setConfig(self, config)
-
Expand source code
def setConfig(self, config): self.config = config
def setIDs(self, IDs)
-
Expand source code
def setIDs(self, IDs): self.__IDs = IDs
def setInFileList(self, fileList)
-
Expand source code
def setInFileList(self, fileList): self.__inFileList = fileList
def setOutFileList(self, fileList)
-
Expand source code
def setOutFileList(self, fileList): self.__outFileList = fileList
def setProcesses(self, processes)
-
Expand source code
def setProcesses(self, processes): self.__processes = processes