In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sqlite3
!mkdir -p 07_compare_fit
In [2]:
# Load data
with sqlite3.connect("lce_data.sqlite") as con:
fitness = pd.read_sql_query("SELECT * FROM fitness_assays", con)
fits = pd.read_sql_query("SELECT * FROM survival_fit WHERE fit=='L-LCS--modelbeta-grid100-p1-1500' OR fit=='L-LCS--modelbeta-grid100-p2-1500'", con)
treedt = pd.read_sql_query("SELECT * FROM genealogies WHERE experiment=='L-LCS-'", con)
fits = fits.query('name!="L-LCS-_ROOT"')
fits['time'] = [int(x.split('_')[1].split('-')[0]) for x in fits.name]
fits['phase'] = [x.split('-')[-2] for x in fits.fit]
fitness = fitness[['WT' not in x for x in fitness.microcosm]]
fitness['proba_p1'] = fitness['proba_mat_p1']*fitness['proba_SM_p1']
fitness['time'] = [int(x.split('-')[0]) if x.split('-')[0].isdigit() else 0 for x in fitness.microcosm]
In [3]:
def plot_comparison_p1(assay_fitness, bayesian_fitness, ancestor, parents):
fig,ax = plt.subplots(1,1, figsize=(11,8))
ax.scatter(assay_fitness.time, assay_fitness.proba_p1,
alpha=0.5, label='Independent fitness assay')
ax.scatter(bayesian_fitness.time, bayesian_fitness.survival_estimate,
label='Bayesian estimate from the genealogy',marker='x')
assay_fitness['name'] = [x.split('-')[-1].strip() for x in assay_fitness.microcosm]
for r,row in assay_fitness.iterrows():
ax.text(row.time, row.proba_p1,
row.microcosm.split('-')[-1], rotation=45, color='C0')
pos = bayesian_fitness.set_index('name').survival_estimate.to_dict()
for r,row in bayesian_fitness.iterrows():
if row['name'] in parents and parents[row['name']] in pos.keys():
ax.plot([row.time, row.time-1],
[row.survival_estimate, pos[parents[row['name']]]], color='C1',
alpha=0.5)
n = row['name'].split('_')[1].split('-')[-1]
if (n in assay_fitness.query(f'time=={row.time}')['name'].unique()
or (row.time==2 and n == "A3")
or (row.time==1 and n == "F8")):
ax.text(row.time, row.survival_estimate, n, rotation=-45, color='C1',
horizontalalignment='right',
verticalalignment='bottom'
)
ax.set(ylabel="Ph. I survival probability", xlabel="Generation-Phase",
xticks=np.arange(1,12), xticklabels=[f"{x}-I" for x in np.arange(1,12)]);
ax.legend()
ax.text(1, ancestor+0.01, "LCS- ancestor", color='C0')
ax.axhline(ancestor, ls='--')
ax.set(title='L-LCS-')
return fig,ax
In [4]:
fig, ax = plot_comparison_p1(assay_fitness=fitness.query("size=='L'").query('treatment=="LCS-"') ,
bayesian_fitness = fits.query('phase=="p1"'),
ancestor = fitness.query("size=='L'").query('microcosm=="LCS- ancestor"').proba_p1.values[0],
parents = treedt.set_index('name').parent_previous_cycle.to_dict())
fig.savefig("07_compare_fit/comparison_llcm_p1.pdf",bbox_inches='tight')