Source code for Plotting

import matplotlib.pylab as plt
import numpy as np
import pandas as pd
import seaborn as sns

[docs]def plot_length_distribution(filename, savename = None): '''Plots the time averaged length distribution of molecules in the entire system. Shows plot unless a savename is specified ''' ts_df = pd.read_csv(filename) static_df = ts_df.filter(items = ['molecule', 'abundance']) lengths_lists = [list() for x in range(0,7)] for index, row in static_df.iterrows(): lengths_lists[len(row['molecule'])].append(float(row['abundance'])) length_averages = [0.0 for x in range(0,7)] length_var = [0.0 for x in range(0,7)] for l in range(1,7): length_averages[l] = np.mean(lengths_lists[l]) length_var[l] = np.std(lengths_lists[l]) plt.bar( [x for x in range(1,7)] ,length_averages[1:], yerr = length_var[1:]) plt.ylabel('Molecule Count') plt.xlabel('Molecule Size/Length') plt.title('Time averaged length abundances') plt.ylim(ymin=0) if savename != None: plt.savefig(savename) else: plt.show() plt.close()
[docs]def plot_molecule_distribution(filename, savename = None): '''Plots the time averaged molecule distribution of molecules in the entire system. Shows plot unless a savename is specified ''' ts_df = pd.read_csv(filename) max_length = 0 static_df = ts_df.filter(items = ['molecule', 'abundance']) molecules = list(static_df['molecule'].unique() ) reduced_molecules = [m for m in molecules if len(m) <= 4] molecules = reduced_molecules molecule_dict = dict() for m in molecules: molecule_dict[m] = list() if len(m) > max_length: max_length = len(m) nM = len(molecules) color_palette = sns.color_palette("husl", max_length) for index, row in static_df.iterrows(): if row['molecule'] in molecules: lm = len(row['molecule']) molecule_dict[row['molecule']].append(float(lm*row['abundance'])) molecule_abundances = [0.0 for x in range(0,nM)] molecule_std = [0.0 for x in range(nM)] colors = [None for x in range(nM)] labels = [None for x in range(nM)] lengths = [None for x in range(nM)] for i in range(nM): m = molecules[i] colors[i] = color_palette[len(m)-1] molecule_abundances[i] = np.mean(molecule_dict[m]) molecule_std[i] = np.std(molecule_dict[m]) labels[i] = "length %i" %len(m) lengths[i] = len(m) current_x = 0 for i in range(1,max_length+1): indices_used = [] for j in range(nM): if lengths[j] == i: indices_used.append(j) current_molecules = [molecule_abundances[m] for m in indices_used] current_std = [molecule_std[m] for m in indices_used] label = "length %i" % i plt.bar( [x for x in range(current_x, current_x +len(indices_used))] ,current_molecules, yerr = current_std, color = color_palette[(i-1)], ecolor= color_palette[(i-1)], label= label) current_x += len(indices_used) plt.legend(fontsize = 40) ax = plt.gca() ax.set_ylim(ymin=0) ax.set_ylabel('Molecule Count', fontsize = 30, position = (-0.2, 0.5)) #ax.tick_params(axis='both', which='major', pad=) plt.xticks([x for x in range(current_x)], molecules, rotation = 'vertical', fontsize = 30) plt.yticks(fontsize = 20) ax.xaxis.set_ticks([x + 0.35 for x in range(0, nM)]) fig = plt.gcf() fig.set_size_inches(18, 18) plt.title('Molecule Distribution', fontsize = 50) if savename != None: plt.savefig(savename) else: plt.show() plt.close()
[docs]def plot_time_series(filename, print_molcules, savename = None): '''Plot the time series of molecule names in print_molcules, if savename provide figure will be saved ''' import random color_palette = sns.color_palette("husl", len(print_molcules)) colorindex = [x for x in range(len(print_molcules))] generate_ts_df(filename, 'temp_TS.csv') ts_df = pd.read_csv('temp_TS.csv', index_col = 0) ts_df = ts_df.T for i in range(len(print_molcules)): m = print_molcules[i] abundances = list(ts_df[m]) mass_fraction = [float(x)/10000 for x in abundances] time = list(np.array(ts_df.index.values, dtype = float)) z = zip(mass_fraction, time) z = sorted(z, key = lambda x: x[1]) mass_fraction, time = zip(*z) [ci1] = random.sample(colorindex,1) colorindex.remove(ci1) plt.plot(time, mass_fraction, color = color_palette[ci1]) plt.xticks(fontsize = 30) plt.yticks(fontsize = 30) plt.ylabel('Mass Fraction', fontsize = 30) plt.xlabel('Time', fontsize = 30) fig = plt.gcf() fig.set_size_inches(18, 18) plt.legend(fontsize = 20) plt.xlabel('Time') plt.ylabel('Mass Fraction') #[ymin, ymax] = plt.ylim() plt.ylim(ymin = 0.0) if savename != None: plt.savefig(savename) else: plt.show() plt.close()
######################################################################################## def generate_ts_df(infile, outname): # Load Tidy Data File tidy_df = pd.read_csv(infile) # Get the times times = tidy_df['time'].unique() # Get Molecules molecules = tidy_df['molecule'].unique() timeseries_df = pd.DataFrame(index = molecules, columns = times) #print timeseries_df #For each time, construct a new entry in the time series df for t in times: t_df = tidy_df[tidy_df.time == t].filter(items = ['abundance']) t_df = t_df.set_index(molecules) #print t_df timeseries_df[t] = t_df # print timeseries_df # raw_input("Enter") timeseries_df.to_csv(outname)