Source code for metropolis_hastings

import pandas as pd
import numpy as np
import joblib

import matplotlib.pyplot as plt
import seaborn as sns
import itertools
from copy import deepcopy

from operators import Operator, OperatorFactory
from helper_func import activity_colors
from activity import Schedule
from typing import Dict, List, Optional, Union, Tuple, Generator


# -------------------------- RANDOM WALK -----------------------------------#

[docs]def random_walk(init_sched: Schedule, n_iter: int = 1000, operators: Optional[List] = None, p_operators: Optional[List] = None, uniform: bool = False, param_file:Optional[str] = None, rnd_term:float = None, **kwargs)->Generator[Tuple[Schedule, Operator, float], None, None]: """ Random walk generator Parameters ------------- - init_sched: Initial Schedule object - n_iter: number of iterations - p_operators: operators probabilities - uniform: if True, defines a constant target distribution - param_file: location of file containing the parameters - rnd_term: random term to add to the utility function - kwargs: additional keyword arguments Returns ------------- Generator with current state, operator, and acceptance probability. """ #Initialize schedule and operators accepted = 0 rejected = 0 init_sched.streamline() if not operators: operators = ['Block', 'Assign', 'AddAnchor', 'Swap', 'InflateDeflate', 'MetaOperator'] if not p_operators: p_operators = len(operators)*[1/len(operators)] current_state = init_sched current_discret = current_state.discretization op_factory = OperatorFactory() #Compute initial weights current_weight = target_weight(init_sched, uniform=uniform, param_file=param_file, rnd_term=rnd_term) #Start walk for n in range(n_iter): new_state = deepcopy(current_state) op = op_factory.draw(operators, p_operators) try: new_state = op.apply_change(new_state) new_state.streamline() except: continue #Compute change probabilities f_proba = forward_probability(current_state, new_state, op) b_proba = backward_probability(current_state, new_state, op) #Compute new weights and log-ratio (acceptance proba) new_weight = target_weight(new_state, uniform=uniform, param_file=param_file, rnd_term=rnd_term) ratio = min(np.log(abs(new_weight)) - np.log(abs(current_weight)) + np.log(b_proba) - np.log(f_proba), 0) #Accept proposal u = np.log(np.random.rand()) if u <= ratio: current_state = new_state current_weight = new_weight current_state.streamline() del new_state, new_weight yield current_state, op, ratio
[docs]def forward_probability(current_state: Schedule, proposal_state: Schedule, operator: Operator) -> float: """Returns forward probability to go from state i (current state) to j (proposed state) within a given iteration. Parameters: ------------------- - current_state: current schedule - proposal_state: proposal schedule - operator: object of class Operator Returns: ------------------ - Forward probability """ if not isinstance(operator, Operator): raise TypeError("Can't compute forward probability - not a valid operator.") proba = operator.proba*operator.compute_change_proba(current_state, proposal_state) return proba
[docs]def backward_probability(current_state: Schedule, proposal_state: Schedule, operator: Operator) -> float: """Returns backward probability to go from state j (proposed state) to i (current state) within a given iteration. Parameters: ------------------- - current_state: current schedule - proposal_state: proposal schedule - operator: object of class Operator Returns: ------------------ - Backward probability """ if not isinstance(operator, Operator): raise TypeError("Can't compute backward probability - not a valid operator.") proba = operator.proba*operator.compute_change_proba(proposal_state, current_state) return proba
[docs]def target_weight(sched: Schedule, uniform: bool =False, param_file: Optional[str] = None, rnd_term:Optional[float] = None) -> float: """ Computes target weight for MH --> utility of schedule from initial model estimated with simple random sampling Parameters ----------------- -sched: Schedule object -uniform: if True, returns constant weight -param_file: file where to find parameters -rnd_term: random term for the utility function """ if uniform or (param_file is None): weight = 1 else: parameters = joblib.load(param_file) parameters['business_trip:constant'] = 0 weight = sched.compute_utility(parameters, rnd_term) return weight
# ----------------------------- STATISTICS ------------------------#
[docs]def between_var(sequences: List) -> float: """Computes variance of variable between sequences sequences: M x N list containing the N values of the variable of interest for the M sequences (N is 1/4 of the length of the sequence at the beginning of the MH process)""" M = len(sequences) N = len(sequences[0]) mean_sequences = np.mean([np.mean(x) for x in sequences]) var = (N / (M - 1)) * sum([(np.mean(s) - mean_sequences) ** 2 for s in sequences]) return var
[docs]def within_var(sequences: List) -> float: """Computes variance of variable within sequences sequences: M x N list containing the N values of the variable of interest for the M sequences (N is 1/4 of the length of the sequence at the beginning of the MH process)""" M = len(sequences) N = len(sequences[0]) var = (1 / (M * (N-1))) * sum([sum([(n - np.mean(m)) ** 2 for n in m]) for m in sequences]) return var
[docs]def scale_reduction(sequences: List) -> float: """Computes scale reduction of given sequences""" N = len(sequences[0]) B = between_var(sequences) W = within_var(sequences) return np.sqrt(((N - 1) / N) + B / (N * W))
# ---------------------------- VISUALISATION -------------------------------#
[docs]def collect_distributions(accepted_sched : List, activities: Optional[List]=None, exclude_home: bool =True, plot: bool=True, save_fig: Optional[str] = None, return_dict: bool=True): """Collects and plots the distributions of the resulting choice set for different metrics (start time, duration, activity participation). Parameters: ------------------- - accepted sched: list of accepted schedules from the MH algorithm (final choice set) - activities: list of activities for the collection of metrics. If none are passed, the metrics are computed for all activities - exclude_home: if True only collects info on out of home activities - plot: If True, returns plots - save_fig: export format (png/pdf/svg) as string. if None, the figure is not saved. - return_dict: if True, returns dictionary with statistics """ #Initialize dictionaries start_times = {} daily_duration = {} # total duration for each activity split_duration = {} # average duration per split frequency = {} # number of splits during the day participation = {} # activity participation if activities is None: list_act = ["home", "work","education","shopping", "errands_services","business_trip","leisure","escort"] if exclude_home: list_act.remove("home") else: list_act = activities for act in list_act: st = [] d_dur = [] sp_dur = [] frq = [] part = [] for sched in accepted_sched: act_labels = [a.label for a in sched.list_act] if act not in act_labels: #No participation in activity part.append(0) continue part.append(1) act_freq = len([k for k, g in itertools.groupby(act_labels) if k == act]) frq.append(act_freq) st.extend([a.start_time for a in sched.list_act if a.label == act]) sp_dur.extend([a.duration for a in sched.list_act if a.label == act]) d_dur.append(sum([a.duration for a in sched.list_act if a.label == act])) if sum(part) > 0: # activity was scheduled in at least one schedule start_times[act] = st daily_duration[act] = d_dur split_duration[act] = sp_dur frequency[act] = frq participation[act] = part if plot: #Create dataframes for every statistic st_df = pd.DataFrame.from_dict(start_times, orient="index").transpose().melt(var_name="activity", value_name="start_time") ddur_df = pd.DataFrame.from_dict(daily_duration, orient="index").transpose().melt(var_name="activity", value_name="duration") sdur_df = pd.DataFrame.from_dict(split_duration, orient="index").transpose().melt(var_name="activity", value_name="duration_split") fr_df = pd.DataFrame.from_dict(frequency, orient="index").transpose().melt(var_name="activity", value_name="frequency") part_df = pd.DataFrame.from_dict(participation, orient="index").transpose().melt(var_name="activity", value_name="participation") part_df = part_df.groupby("activity")["participation"].sum().reset_index() part_df["percentage"] = part_df["participation"].apply(lambda x: 100 * x / len(list(participation.values())[0])) sns.set_style("ticks") colors = activity_colors() fig, axs = plt.subplots(5, 1, figsize=[10, 20]) sns.histplot(data=st_df,x="start_time",hue="activity",kde = True,stat="frequency",palette=colors,ax=axs[0]) sns.histplot(data=ddur_df,x="duration",hue="activity",kde = True,palette=colors,stat="frequency",ax=axs[1]) sns.histplot(data=sdur_df,x="duration_split",hue="activity",kde = True,palette=colors,stat="frequency",ax=axs[2]) sns.histplot(data=fr_df,x="frequency",hue="activity",kde = True,palette=colors,stat="frequency",ax=axs[3]) sns.barplot(data=part_df, x="percentage", y="activity", palette=colors, ax=axs[4]) axs[0].set_title(f"Distribution of start times", fontweight="bold") axs[1].set_title(f"Distribution of total daily duration, per activity", fontweight="bold") axs[2].set_title(f"Distribution of duration, per activity split", fontweight="bold") axs[3].set_title(f"Distribution of activity frequency, per schedule", fontweight="bold") axs[4].set_title(f"Activity participation", fontweight="bold") axs[0].set_xlim([0, 24]) plt.tight_layout() if save_fig: filename = f'statistics.{save_fig}' plt.savefig(filename,format = save_fig) print(f'Figure saved at {filename}.') if return_dict: return [start_times, daily_duration, split_duration, frequency, participation] return None