This is a synthetic dataset used to illustrate how Colgen works
In [1]:
import pandas as pd
import colgen.display
import colgen.survival
import colgen.models
import colgen.mutation
import matplotlib.colors
import matplotlib.pyplot as plt
!mkdir -p 08_illustrate
In [2]:
def format_tree_labels(keys, ax, scales):
xticks = {scales[0](k):'-'.join(k.split('-')[:2]) for k in keys}
ax.set_xticks(*zip(*xticks.items()))
yticks = {scales[1](k):k.split('-')[-1] for k in keys}
ax.set_yticks(*zip(*yticks.items()))
In [3]:
r = 'ROOT'
c = ["#c5cfe4ff", "#c0c0c0ff", "#f6e4a5ff", "#b1cce4ff", "#eadcc5ff", "#8ec790ff", "#f8be71ff", "#78a2d0ff"]
d = [("1-I-a1", 0, r, c[0]), ("1-I-a2", 1, r, c[1]), ("1-I-a3", 1, r, c[2]), ("1-I-a4", 1, r, c[3]),
("1-I-a5", 0, r, c[4]), ("1-I-a6", 1, r, c[5]), ("1-I-a7", 0, r, c[6]), ("1-I-a8", 1, r, c[7]),
("1-II-a1", 1, "1-I-a2", c[1]), ("1-II-a2", 1, "1-I-a2", c[1]), ("1-II-a3", 0, "1-I-a3", c[2]), ("1-II-a4", 1, "1-I-a4", c[3]),
("1-II-a5", 0, "1-I-a4", c[3]), ("1-II-a6", 1, "1-I-a6", c[5]), ("1-II-a7", 0, "1-I-a6", c[5]), ("1-II-a8", 1, "1-I-a8", c[7]),
("2-I-a1", 1, "1-II-a2", c[1]), ("2-I-a2", 1, "1-II-a2", c[1]), ("2-I-a3", 0, "1-II-a2", c[1]), ("2-I-a4", 0, "1-II-a4", c[3]),
("2-I-a5", 0, "1-II-a4", c[0]), ("2-I-a6", 1, "1-II-a6", c[5]), ("2-I-a7", 0, "1-II-a6", c[5]), ("2-I-a8", 1, "1-II-a6", c[5]),
("2-II-a1", 1, "2-I-a2", c[1]), ("2-II-a2", 1, "2-I-a2", c[1]), ("2-II-a3", 0, "2-I-a2", c[1]), ("2-II-a4", 1, "2-I-a6", c[5]),
("2-II-a5", 1, "2-I-a6", c[5]), ("2-II-a6", 1, "2-I-a6", c[5]), ("2-II-a7", 1, "2-I-a6", c[5]), ("2-II-a8", 1, "2-I-a6", c[5]),
("3-I-a1", 1, "2-II-a1", c[1]), ("3-I-a2", 1, "2-II-a1", c[1]), ("3-I-a3", 1, "2-II-a4", c[5]), ("3-I-a4", 1, "2-II-a4", c[5]),
("3-I-a5", 1, "2-II-a5", c[5]), ("3-I-a6", 1, "2-II-a6", c[5]), ("3-I-a7", 1, "2-II-a7", c[5]), ("3-I-a8", 1, "2-II-a8", c[5]),
("3-II-a1", 1, "3-I-a1", c[1]), ("3-II-a2", 0, "3-I-a2", c[1]), ("3-II-a3", 1, "3-I-a3", c[5]), ("3-II-a4", 1, "3-I-a4", c[5]),
("3-II-a5", 1, "3-I-a5", c[5]), ("3-II-a6", 1, "3-I-a6", c[5]), ("3-II-a7", 1, "3-I-a7", c[5]), ("3-II-a8", 1, "3-I-a8", c[5]),
("4-I-a1", 0, "3-II-a1", c[1]), ("4-I-a2", 1, "3-II-a3", c[5]), ("4-I-a3", 1, "3-II-a3", c[5]), ("4-I-a4", 1, "3-II-a4", c[5]),
("4-I-a5", 1, "3-II-a5", c[5]), ("4-I-a6", 1, "3-II-a6", c[5]), ("4-I-a7", 1, "3-II-a7", c[5]), ("4-I-a8", 1, "3-II-a8", c[5]),
("4-II-a1", 1, "4-I-a2", c[5]), ("4-II-a2", 1, "4-I-a2", c[5]), ("4-II-a3", 1, "4-I-a3", c[5]), ("4-II-a4", 1, "4-I-a4", c[5]),
("4-II-a5", 1, "4-I-a5", c[5]), ("4-II-a6", 1, "4-I-a6", c[5]), ("4-II-a7", 1, "4-I-a7", c[5]), ("4-II-a8", 1, "4-I-a8", c[5]),
]
df = pd.DataFrame(d, columns=['name','extinct','parent','color'])
df.extinct = np.logical_not(df.extinct)
tree,df = colgen.display.load_df(df)
fig,ax = plt.subplots(1,1,figsize=(5,2))
_, scales = colgen.display.draw_tree(tree['branches'],
tree['xinfo'],
tree['yinfo'],
oinfo={d['name']:1 for _,d in df.iterrows()},
color={d['name']:d['color'] for _,d in df.iterrows()},
child_color_branch=True,
size=12*3,
ax=ax)
format_tree_labels(tree['xinfo'].keys(), ax, scales)
fig.savefig("08_illustrate/01_visualisation.svg")
In [4]:
df_survival, cv = colgen.survival.fit(df, colgen.models.beta,
sigma_range=(2,1,100),
steps=20,
max_em_steps=10)
In [5]:
mi = df_survival.survival_change.min()
mx = df_survival.survival_change.max()
if mx == 0:
mx = 0.001
if mi == 0:
mi = -0.001
cmap = matplotlib.colormaps['coolwarm_r']
norm = matplotlib.colors.TwoSlopeNorm(0, mi, mx)
colorize = lambda x: plt.cm.coolwarm_r(norm(x))
change = {k:colorize(v) for k,v in df_survival.set_index('name').survival_change.to_dict().items()}
fig,ax = plt.subplots(1,1,figsize=(5,2))
_, scales = colgen.display.draw_tree(tree['branches'],
tree['xinfo'],
tree['yinfo'],
oinfo={d['name']:1 for _,d in df.iterrows()},
color=change,
child_color_branch=True,
size=12*3,
ax=ax)
format_tree_labels(tree['xinfo'].keys(), ax, scales)
fig.savefig("08_illustrate/breakpoint.svg")
In [6]:
#df['sequenced'] = ["4-II" in x for x in df['name']]
mutation = pd.DataFrame([('4-II-a1','m'),('4-II-a2','m'),('4-II-a3','m'), ]+[(f"4-II-a{x}",'x') for x in range(1,9)], columns=['node','mutation'])
df_propagate, dt = colgen.mutation.fit(df, mutation, 0.1, 0, 0, 1)
WARNING:colgen.mutation:No column "sequenced", assuming that collectives in the mutation list were sequenced
/home/guilhem/research/2019_colgen/mat/lce-genealogies/colgen-2.0b1/colgen-2.0/colgen/mutation.py:105: RuntimeWarning: divide by zero encountered in log return np.log(p_emission*p_mut) /home/guilhem/research/2019_colgen/mat/lce-genealogies/venv/lib/python3.12/site-packages/numpy/lib/_function_base_impl.py:2480: RuntimeWarning: divide by zero encountered in ? (vectorized) outputs = ufunc(*inputs)
In [7]:
fig,ax = plt.subplots(1,1,figsize=(5,2))
_, scales = colgen.display.draw_tree(tree['branches'],
tree['xinfo'],
tree['yinfo'],
oinfo={d['name']:1 for _,d in df.iterrows()},
color={k:'C3' if v else "#c5cfe4ff" for k,v in dt.set_index('name').m.to_dict().items()},
child_color_branch=False,
size=12*3,
ax=ax)
marked = {k for k,v in dt.set_index('name').sequenced.items() if v}
ax.scatter([scales[0](x) for x in marked],
[scales[1](x) for x in marked],
fc='w',ec='k',s=100,marker='s',label='Sequenced',zorder=-1)
format_tree_labels(tree['xinfo'].keys(), ax, scales)
fig.savefig("08_illustrate/mutations.svg")
In [ ]:
In [ ]: