In [1]:
import pandas as pd
import numpy as np
import glob 
import os 
import scipy.stats
import matplotlib.pyplot as plt
import matplotlib
import utils
In [2]:
data = pd.read_excel("raw/24-04-03/Lifecycle fitness tests.xlsx")

from utils import COLORS
EXPORT_PATH = '06_fitness_assays'
WWW_PATH = "www"
if not os.path.exists(EXPORT_PATH):
    os.mkdir(EXPORT_PATH)
if not os.path.exists(WWW_PATH):
    os.mkdir(WWW_PATH)
In [3]:
names = {"Line-17":r"LCS+ ancestor", 
         "Line-17 mutS": "LCS- ancestor", 
         "Line-17 mutS mutL*":"LCS- ancestor $mutL$",
         "SBW25":"SBW25",
         "SBW25 mutS*": "SBW25 $mutS^*$",
         "SBW25 mutL*":"SBW25 $mutL*$"}

def convert_PE(pe):
    if pe in names: 
        return names[pe]
    
    pe = pe.replace("mutL","$mutL^{WT}$")
    pe = pe.replace("wssE","$wssE^{WT}$")

    if 'PE' not in pe:
        return None
    pe = pe.replace('_','-')
    _, cycle, exp, microcosm = pe.split('-')
    cycle = int(cycle)+1
    return f"{cycle}-I-{microcosm}"

data["microcosm"] = [convert_PE(x) for x in data.Strain]
data["treatment"] = ["ancestor" if "PE" not in x else "LCS-" if "WT" in x else "LCS+" for x in data.Strain]

data.replace('na',np.nan, inplace=True)
data.replace('Na',np.nan, inplace=True)
data.replace('9/37',np.nan, inplace=True)
data.head()
/tmp/ipykernel_348611/246941747.py:25: FutureWarning: Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`
  data.replace('na',np.nan, inplace=True)
/tmp/ipykernel_348611/246941747.py:26: FutureWarning: Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`
  data.replace('Na',np.nan, inplace=True)
/tmp/ipykernel_348611/246941747.py:27: FutureWarning: Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`
  data.replace('9/37',np.nan, inplace=True)
Out[3]:
Date Strain Size Rack Tube Phase Mat Count WS Count SM Count FZ Extinct Replaced by Condition microcosm treatment
0 2016-11-15 Line-17 mutS small I 1 1 1.0 100.0 0.0 0 1.0 I6 0 LCS- ancestor ancestor
1 2016-11-15 Line-17 mutS small I 2 1 1.0 100.0 0.0 0 1.0 I6 0 LCS- ancestor ancestor
2 2016-11-15 Line-17 mutS small I 3 1 1.0 108.0 0.0 0 1.0 I5 0 LCS- ancestor ancestor
3 2016-11-15 Line-17 mutS small I 4 1 1.0 116.0 0.0 0 1.0 I5 0 LCS- ancestor ancestor
4 2016-11-15 Line-17 mutS small I 5 1 1.0 40.0 11.0 0 0.0 0 0 LCS- ancestor ancestor
In [4]:
mutL = ['Line-17 mutS mutL*', 'PE-10-17-A2',
       'PE-10-17-C2', 'PE-10-17-C4', 'PE-10-17-C5', 'PE-10-17-D7',
       'PE-10-17-D8', 'PE-10-17-E8', 'PE-10-17-F3', 'PE-10-17-F6',
       'PE-10-WT-A6', 'PE-10-WT-B2', 'PE-10-WT-B5', 'PE-10-WT-C6',
       'PE-10-WT-E1', 'PE-10-WT-E7', 'PE-10-WT-F2', 
        'PE-2-WT-A1', 'PE-2-WT-A5', 'PE-3-WT-E2', 'PE-4-WT-E3 ', 'PE-4-WT-E3 wssE',
       'PE-5-WT-A5', 'PE-5-WT-C3', 'PE-5-WT-E3','SBW25 mutL*']
In [5]:
cols = ["Extinct","Mat","Count WS", "Count SM"]
for i, r in data.iterrows():
    for c in cols:
        try:
            if not np.isnan(r[c]):
                int(r[c])
        except Exception as ex:
            print(ex, i, c,  r[c], r["Strain"])
In [6]:
strains = []
for (strain,size,treatment), df in data.groupby(['Strain',"Size", "treatment"]):
    strains.append({"strain": strain, 
                    "size": "L" if size.lower()=="large" else "S",
                    "mutL": strain in mutL,
                    "treatment": treatment,
                    "microcosm": df.microcosm.unique()[0]})
    for p, d in df.groupby("Phase"):
        strains[-1][f"n_phase{p}"] = d.shape[0]
        strains[-1][f"extinct_phase{p}"] = int((d.Extinct.dropna()>0).sum())
        strains[-1][f"mat_phase{p}"] = int((d.Mat.dropna()>0).sum())
        strains[-1][f"WS_phase{p}"] = int((np.float64(d["Count WS"].dropna())>0).sum())
        strains[-1][f"SM_phase{p}"] = int((d["Count SM"].dropna()>0).sum())

strains = pd.DataFrame(strains)
strains["proba_mat_p1"] = strains.mat_phase1/strains.n_phase1
strains["proba_SM_p1"] = strains.SM_phase1/strains.n_phase1
strains["proba_WS_p2"] = strains.WS_phase2/strains.n_phase2
strains["proba_lifecycle"] = strains.proba_mat_p1 * strains.proba_SM_p1 * strains.proba_WS_p2 
strains.head()
Out[6]:
strain size mutL treatment microcosm n_phase1 extinct_phase1 mat_phase1 WS_phase1 SM_phase1 n_phase2 extinct_phase2 mat_phase2 WS_phase2 SM_phase2 proba_mat_p1 proba_SM_p1 proba_WS_p2 proba_lifecycle
0 Line-17 L False ancestor LCS+ ancestor 72 50 24 69 70 72 0 31 63 63 0.333333 0.972222 0.875000 0.283565
1 Line-17 S False ancestor LCS+ ancestor 72 4 69 72 71 72 0 58 71 71 0.958333 0.986111 0.986111 0.931898
2 Line-17 mutS L False ancestor LCS- ancestor 184 156 51 179 158 138 25 32 112 136 0.277174 0.858696 0.811594 0.193166
3 Line-17 mutS S False ancestor LCS- ancestor 88 51 60 88 65 88 13 40 69 72 0.681818 0.738636 0.784091 0.394880
4 Line-17 mutS mutL* S True ancestor LCS- ancestor $mutL$ 48 11 35 45 46 37 0 18 35 35 0.729167 0.958333 0.945946 0.661013
In [7]:
import sqlite3
with sqlite3.connect("lce_data.sqlite") as database:
    strains.to_sql("fitness_assays", database, if_exists="replace", index=False)
In [8]:
strains.to_csv(os.path.join(EXPORT_PATH,"fitness_assays.csv"))
border =  [{'selector': 'th', 'props': 'border-left: 1px solid black'},
           {'selector': 'td', 'props': 'border-left: 1px solid black'}]
html = strains.style.set_table_styles({
    ('ID','name'): border
}, overwrite=False, axis=0)\
.set_sticky(axis="index")\
.set_sticky(axis="columns")\
.bar(subset=[x for x in strains.columns if "proba" in x], color=matplotlib.colors.to_hex('C0'))\
.to_html()

with open(os.path.join(WWW_PATH, "fitness_assays.html"),'w') as f:
    f.write("<html><head><meta charset=\"utf-8\"><title>Fitness Assays Data</title><style> table, th, td {border: 1px solid} table{border-collapse: collapse;} thead{background-color:white;} </style></head><body>")
    f.write(html)
    f.write("</body>")
In [9]:
plots = [["mat_phase1", "Probability to avoid soma failure\n during Ph. I"],
         ['SM_phase1', "Probability to produce germ cells\n during Ph. I"],
         ['WS_phase2', "Probability to produce soma cells\n during Ph. II"]]
In [10]:
def plotproba(col, df, title, ax,number=True, col_color=None): 
    labels = []
    yticks = []
    twinlabels = []
    u = 0
    for _,row in df.iterrows():
        u+=1
        phase = 1 if 'phase1' in col else '2'
        if col_color:
            c = 'C1' if row[col_color] else "C0"
        else:
            c = 'C0'
        number = row[f"n_phase{phase}"]
        posterior = scipy.stats.beta(1+row[col],1+number-row[col])
        proba = row[col]/number

        labels.append(f"{row.microcosm.strip():>30}")
        twinlabels.append(f" {proba:03.2f} ({row[col]}/{number})")


        yticks.append(u)
        ax.scatter(proba,u, color=c)
        ax.barh(u,posterior.interval(0.50)[1]-posterior.interval(0.50)[0], left=posterior.interval(0.50)[0], alpha=0.5, color=c)
        ax.hlines(u,xmin=posterior.interval(0.95)[0], xmax=posterior.interval(0.95)[1], color=c, lw=4)

        ax.text(proba,u+0.4, f"{proba:03.2f}", ha='center', va='top')
        try:
            ax.text(proba,u-0.4, f"{int(row[col])}/{int(number)}", ha='center')
        except Exception:
            pass
    for u in yticks:
        ax.axhline(u, color='k', ls=":", alpha=0.5)

    ax.set(yticks=yticks, yticklabels=labels, 
          xlim=(0,1))
    ax.set_title(title, font={'weight':'bold'})
    
    
def plotproba2(col, df, title, ax, col_color=None): 
    labels = []
    yticks = []
    twinlabels = []
    u = 0
    for _,row in df.iterrows():
        u+=1
        if col_color:
            c = 'C1' if row[col_color] else "C0"
        else:
            c = 'C0'
        proba = row[col]
        yticks.append(u)
        ax.scatter(proba,u, color=c)
        ax.text(proba,u+0.4, f"{proba:03.2f}", ha='center', va='top')
    for u in yticks:
        ax.axhline(u, color='k', ls=":", alpha=0.5)

    ax.set(yticks=yticks, yticklabels=labels, 
          xlim=(0,1))
    ax.set_title(title, font={'weight':'bold'})
    
def plot(df, size=(20,18)):
    fig, ax =plt.subplots(2,2, figsize=size,layout='constrained', sharey=True)
    plotproba2("proba_lifecycle", df, "Probability to complete the life cycle", ax[1,1])    
    for i, (col,title) in enumerate(plots):
        plotproba(col, df, title, ax.flat[i]) 
    return fig,ax 
In [11]:
strains["endpoint"] = ["11" in x for x in strains.microcosm]
In [12]:
st = strains[[('wss' not in x and 'mut' not in x) for x in strains.microcosm]]
for t in utils.TREATMENTS:
    size, genotype = t.split("-",1)
    data = pd.concat([st.query(f"size=='{size}' and treatment=='{genotype}' and endpoint==True").sort_values('proba_lifecycle', ascending=False),
                      st.query(f"size=='{size}' and microcosm=='{genotype} ancestor'")
                     ])
    fig,ax = plot(data,(10,9) )
    fig.suptitle(t)
    print(size, genotype)
    plt.savefig(os.path.join(EXPORT_PATH, f"proba_{t}.pdf"),bbox_inches='tight')
    plt.savefig(os.path.join(EXPORT_PATH, f"proba_{t}.png"),bbox_inches='tight')
    plt.show()
S LCS+
No description has been provided for this image
L LCS+
No description has been provided for this image
S LCS-
No description has been provided for this image
L LCS-
No description has been provided for this image
In [13]:
fig,ax = plot(strains[[x=='L' for x in strains['size']]].sort_values('proba_lifecycle')) 
fig.suptitle("Large microcosms")
plt.savefig(os.path.join(EXPORT_PATH, "proba_large.pdf"),bbox_inches='tight')
No description has been provided for this image
In [14]:
fig,ax = plot(strains[[x=='S' for x in strains['size']]].sort_values('proba_lifecycle')) 
fig.suptitle("Small microcosms")
plt.savefig(os.path.join(EXPORT_PATH, "proba_small.pdf"),bbox_inches='tight')
No description has been provided for this image
In [15]:
to_disp = ['Line-17 mutS', 
           'PE-2-WT-A5',
           'PE-2-WT-A5 mutL',
           'PE-3-WT-E2', 
           'PE-4-WT-E3 ',
           'PE-5-WT-E3',
           ][::-1]

fig,ax = plot(strains[[x=='L' for x in strains['size']]].set_index('strain').loc[to_disp,:], (10,8))
plt.savefig(os.path.join(EXPORT_PATH, "proba.pdf"),bbox_inches='tight')
No description has been provided for this image
In [16]:
def plot_wide(df, size=(14,5), color=None):
    fig, ax =plt.subplots(1,4, figsize=size,layout='constrained', sharey=True)
    plotproba2("proba_lifecycle", df, "a. Probability to complete\n the life cycle", ax[0], col_color=color)    
    for i, (col,title) in enumerate(plots):
        plotproba(col, df, "bcd"[i]+". "+title, ax[1+i], col_color=color) 
    return fig,ax 
In [17]:
st = strains[[('wss' not in x and 'mut' not in x) for x in strains.microcosm]]
for t in utils.TREATMENTS:
    size, genotype = t.split("-",1)
    data = pd.concat([st.query(f"size=='{size}' and treatment=='{genotype}' and endpoint==True").sort_values('proba_lifecycle', ascending=False),
                      st.query(f"size=='{size}' and microcosm=='{genotype} ancestor'")
                     ])
    fig,ax = plot_wide(data)
    fig.suptitle(t, fontweight='bold',fontsize=20)
    print(size, genotype)
    plt.savefig(os.path.join(EXPORT_PATH, f"proba_{t}.pdf"),bbox_inches='tight')
    plt.savefig(os.path.join(EXPORT_PATH, f"proba_{t}.png"),bbox_inches='tight')
    plt.show()
S LCS+
No description has been provided for this image
L LCS+
No description has been provided for this image
S LCS-
No description has been provided for this image
L LCS-
No description has been provided for this image
In [18]:
to_disp = ['Line-17 mutS', 
           'PE-2-WT-A5',
           'PE-2-WT-A5 mutL',
           'PE-3-WT-E2', 
           'PE-4-WT-E3 ',
           'PE-5-WT-E3',
           ][::-1]

fig,ax = plot_wide(strains[[x=='L' for x in strains['size']]].set_index('strain').loc[to_disp,:], color='mutL')
plt.savefig(os.path.join(EXPORT_PATH, "proba.pdf"),bbox_inches='tight')
No description has been provided for this image