# -*- coding: utf-8 -*-
# Apache Software License 2.0
#
# Copyright (c) 2018, Christophe Duong
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Handles configurations files for the application
"""
import logging
import os
import uuid
from datetime import datetime
from logging import config
from platform import uname
from tempfile import TemporaryDirectory
from urllib.error import HTTPError
from urllib.error import URLError
import pyhocon
from pytz import timezone
from yaml import safe_load
from aiscalator import __version__
from aiscalator.core.utils import copy_replace
from aiscalator.core.utils import data_file
[docs]def setup_logging():
""" Setup the logging configuration of the application """
log_level = os.getenv('AISCALATOR_LOG_LEVEL', None)
with open(data_file("../config/logging.yaml"), 'rt') as file:
path = load_logging_conf(file)
if not path:
logging.basicConfig(level=logging.INFO)
if log_level:
logging.root.setLevel(log_level)
msg = ("Starting " + os.path.basename(__name__) +
" version " + __version__ + " on " +
"_".join(uname()).replace(" ", "_"))
logging.debug(msg)
[docs]def load_logging_conf(file):
"""Reads and loads the logging configuration file"""
if file:
os.makedirs('/tmp/aiscalator/log', exist_ok=True)
conf = safe_load(file.read())
config.dictConfig(conf)
return file
return None
[docs]def generate_global_config() -> str:
"""Generate a standard configuration file for the application in the
user's home folder ~/.aiscalator/config/aiscalator.conf from the
template file in aiscalator/config/template/aiscalator.conf
"""
dst = os.path.join(os.path.expanduser("~"),
".aiscalator/config/aiscalator.conf")
now = '"' + str(datetime
.utcnow()
.replace(tzinfo=timezone("UTC"))) + '"'
with TemporaryDirectory() as tmp:
copy_replace(data_file("../config/template/aiscalator.conf"),
os.path.join(tmp, "aiscalator.conf"),
pattern="testUserID",
replace_value=generate_user_id())
copy_replace(os.path.join(tmp, "aiscalator.conf"),
dst,
pattern="generation_date",
replace_value=now)
return dst
[docs]def generate_user_id() -> str:
"""
Returns
-------
str
Returns a string identifying this user when the
setup was run first
"""
return 'u' + str((uuid.getnode()))
[docs]class AiscalatorConfig:
"""
A configuration object for the Aiscalator application.
This object stores:
- global configuration for the whole application
- configuration for a particular context specified in a step
configuration file.
- In this case, we might even focus on a particular step.
...
Attributes
----------
_user_config_override : str
path to the specified user config
_app_conf
global configuration object for the application
_step_config : str
path to the step configuration (or plain configuration as string)
_focused_steps : list
list of selected steps
_step_it : int
index of the step being processed
_step
configuration object for the currently processed step
"""
def __init__(self,
user_config_override=None,
step_config=None,
steps_selection=None):
"""
Parameters
----------
user_config_override : str
path of the user configuration folder to override
the default one
step_config : str
path to the step configuration file (or plain configuration
string)
steps_selection : List
list of names of steps from the configuration file to focus on
"""
self._user_config_override = user_config_override
self._app_conf = self.setup_app_config()
setup_logging()
self._step_config = step_config
all_steps = parse_step_config(step_config)
self._focused_steps = select_steps(all_steps, steps_selection)
self._step_it = 0
self._step = self.next_step()
[docs] def setup_app_config(self):
"""
Setup global application configuration.
If not found in the default location, this method will generate
a brand new one.
"""
try:
file = self.find_user_config_file("config/aiscalator.conf")
conf = pyhocon.ConfigFactory.parse_file(file)
except FileNotFoundError:
conf = pyhocon.ConfigFactory.parse_file(generate_global_config())
return conf
[docs] def find_user_config_file(self, filename) -> str:
"""
Looks for configuration files in the user configuration folder
Parameters
----------
filename : str
file to search for
Returns
-------
str
path to the filename in the user configuration folder
"""
# TODO check user_config_folder override in environment
if self._user_config_override:
return os.path.join(self._user_config_override, filename)
# TODO url user config?
return os.path.join(os.path.expanduser("~"), '.aiscalator', filename)
[docs] def next_step(self):
"""
Iterates to the next configuration step from the list
of selected steps
Returns
-------
the next configuration object
"""
result = None
i = self._step_it
if i < len(self._focused_steps):
self._step_it += 1
result = self._focused_steps[i]
self._step = result
return result
[docs] def user_env_file(self) -> list:
"""
Find a list of env files to pass to docker containers
Returns
-------
List
env files
"""
# TODO look if env file has been defined in the focused step
# TODO look in user config if env file has been redefined
return [self.find_user_config_file("config/.env")]
[docs] def notebook_output_path(self, notebook) -> str:
"""Generates the name of the output notebook"""
return ("/home/jovyan/work/notebook_run/" +
os.path.basename(notebook).replace(".ipynb", "") + "_" +
self.timestamp_now() +
self.user_id() +
".ipynb")
[docs] def timestamp_now(self) -> str:
"""
Depending on how the timezone is configured, returns the
timestamp for this instant.
"""
date_now = datetime.utcnow().replace(tzinfo=timezone("UTC"))
if self._app_conf["aiscalator"]:
pst = timezone(self.app_config().timezone)
else:
pst = timezone('Europe/Paris')
return date_now.astimezone(pst).strftime("%Y%m%d%H%M%S")
[docs] def app_config(self):
"""
Returns
-------
str
the configuration object for the aiscalator application
"""
return self._app_conf["aiscalator"]
[docs] def user_id(self) -> str:
"""
Returns
-------
str
the user id stored when the application was first setup
"""
return self.app_config()["user.id"]
[docs] def app_config_has(self, field) -> bool:
"""
Tests if the applicatin config has a configuration
value for the field.
"""
if not self.app_config():
return False
return field in self.app_config()
[docs] def step_field(self, field):
"""
Returns the value associated with the field for the currently
focused step.
"""
if self.has_step_field(field):
return self._step[1][field]
return None
[docs] def has_step_field(self, field) -> bool:
"""
Tests if the currently focused step has a configuration
value for the field.
"""
if not self._step:
return False
return field in self._step[1]
[docs] def step_config_path(self):
"""
Returns
-------
str
Returns the path to the step configuration file.
If it was an URL, it will return the path to the temporary
downloaded version of it.
If it was a plain string, then returns None
"""
if os.path.exists(self._step_config):
if pyhocon.ConfigFactory.parse_file(self._step_config):
return os.path.abspath(self._step_config)
# TODO if string is url/git repo, download file locally first
return None
[docs] def step_name(self):
"""
Returns the name of the currently focused step
"""
if not self._step:
return None
return self._step[0]
[docs] def root_dir(self):
"""
Returns
-------
str
Returns the path to the folder containing the step
configuration file
"""
path = self.step_config_path()
if path:
root_dir = os.path.dirname(path)
if not root_dir.endswith("/"):
root_dir += "/"
return root_dir
return None
[docs] def file_path(self, string):
"""
Returns absolute path of a file from a field of the currently
focused step.
"""
if not self.has_step_field(string):
return None
# TODO handle url
root_dir = self.root_dir()
if root_dir:
return os.path.abspath(os.path.join(root_dir,
self.step_field(string)))
return os.path.abspath(self.step_field(string))
[docs] def container_name(self) -> str:
"""Return the docker container name to execute this step"""
return (
self.step_field("task.type") +
"_" +
self.step_name()
)
[docs] def validate_config(self):
"""
Check if all the fields in the reference config are
defined in focused steps too. Otherwise
raise an Exception (either pyhocon.ConfigMissingException
or pyhocon.ConfigWrongTypeException)
"""
reference = data_file("../config/template/minimum_aiscalator.conf")
ref = pyhocon.ConfigFactory.parse_file(reference)
msg = "In Global Application Configuration file "
validate_configs(self._app_conf, ref, msg,
missing_exception=True,
type_mismatch_exception=True)
reference = data_file("../config/template/aiscalator.conf")
ref = pyhocon.ConfigFactory.parse_file(reference)
msg = "In Global Application Configuration file "
validate_configs(self._app_conf, ref, msg,
missing_exception=False,
type_mismatch_exception=True)
reference = data_file("../config/template/minimum_step.conf")
ref = pyhocon.ConfigFactory.parse_file(reference)
for step_name, step in self._focused_steps:
msg = "in step named " + step_name
validate_configs(step,
ref["steps"]["Untitled"],
msg,
missing_exception=True,
type_mismatch_exception=True)
reference = data_file("../config/template/step.conf")
ref = pyhocon.ConfigFactory.parse_file(reference)
for step_name, step in self._focused_steps:
msg = "in step named " + step_name
validate_configs(step,
ref["steps"]["Untitled"],
msg,
missing_exception=False,
type_mismatch_exception=True)
[docs]def validate_configs(test, reference, path,
missing_exception=True,
type_mismatch_exception=True):
"""
Recursively check two configs if they match
Parameters
----------
test
configuration object to test
reference
reference configuration object
path : str
this accumulates the recursive path for details in Exceptions
missing_exception : bool
when a missing field is found, raise xception?
type_mismatch_exception : bool
when a field has type mismatch, raise xception?
"""
# TODO instead of exceptions, build a log of "compilation" errors...
for key in reference.keys():
if key not in test.keys():
msg = (path + ": Missing definition of " + key)
if missing_exception:
raise pyhocon.ConfigMissingException(
message="Exception " + msg
)
else:
logging.warning("Warning %s", msg)
elif not isinstance(test[key], type(reference[key])):
msg = (path + ": Type mismatch of " + key + " found type " +
str(type(test[key])) + " instead of " +
str(type(reference[key])))
if type_mismatch_exception:
raise pyhocon.ConfigWrongTypeException(
message="Exception " + msg
)
else:
logging.warning("Warning %s", msg)
elif (isinstance(test[key], pyhocon.config_tree.ConfigTree) and
isinstance(reference[key], pyhocon.config_tree.ConfigTree)):
# test recursively
validate_configs(test[key], reference[key],
".".join([path, key]),
missing_exception,
type_mismatch_exception)
elif (isinstance(test[key], list) and
isinstance(reference[key], list)):
# iterate through both collections
for i in test[key]:
for j in reference[key]:
validate_configs(i, j, ".".join([path, key]),
missing_exception,
type_mismatch_exception)
[docs]def parse_step_config(step_config):
"""
Interpret the step_config to produce a step configuration
object. It could be provided as:
- a path to a local file
- a url to a remote file
- the plain configuration stored as string
Returns
-------
Step configuration object
"""
if not step_config:
return None
if os.path.exists(step_config):
conf = pyhocon.ConfigFactory.parse_file(step_config)
else:
try:
conf = pyhocon.ConfigFactory.parse_URL(step_config)
except (HTTPError, URLError):
conf = pyhocon.ConfigFactory.parse_string(step_config)
return conf
[docs]def select_steps(step_conf, steps_selection: list) -> list:
"""
Extract the list of step objects corresponding to
the list of names provided.
Parameters
----------
step_conf
step configuration object
steps_selection : list
list of names of step to extract
Returns
-------
list
list of tuples of (step_name, step) of selected
configuration objects
"""
result = []
tasks = []
if step_conf:
tasks = find_tasks(step_conf["steps"])
if steps_selection:
for target_step in steps_selection:
for step_name, step in tasks:
if step_name == target_step:
result += [(step_name, step)]
else:
result = [tasks[0]]
if steps_selection and not result:
msg = (" ".join(steps_selection) +
" was not found in step configurations.\n ")
if tasks:
msg += ("Available tasks are: " +
" ".join([task_name for task_name, tasks in tasks]))
raise pyhocon.ConfigMissingException(msg)
return result
[docs]def find_tasks(tree: pyhocon.ConfigTree, path=""):
"""
Find all Tasks objects in the Configuration object and report
their paths.
Parameters
----------
tree : pyhocon.ConfigTree
Configuration object
path : str
path that was traversed to get to this tree
Returns
-------
list
list of names of Configuration objects containing a
definition of a section 'task'
"""
result = []
if path:
next_path = path + "."
else:
next_path = ""
for key in tree.keys():
if key == 'task':
result += [(path, tree)]
else:
if isinstance(tree[key], pyhocon.config_tree.ConfigTree):
value = find_tasks(tree[key], path=next_path + key)
if value:
result += value
return result