Pathway analysis#
In the tutorial, we try to interpret GO term or pathway with actual biological context using LLM.
import scanpy as sc
adata = sc.read_h5ad("pbmc.h5ad")
sc.pl.umap(adata, color=["leiden", "cell_type_lvl1"], legend_loc="on data", frameon=False)
celltype_dic = adata.obs.set_index('leiden')['cell_type_lvl1'].to_dict()
celltype_dic
{'0': 'CD4 T',
'1': 'B',
'2': 'FCGR3A+ Monocytes',
'3': 'NK',
'4': 'CD8 T',
'5': 'CD14+ Monocytes',
'6': 'Dendritic',
'7': 'Megakaryocytes'}
deg_df = sc.get.rank_genes_groups_df(adata, None,key="logreg_deg")
import gseapy as gp
term_dic = {}
for gi,sdf in deg_df.groupby("group"):
enr_bp = gp.enrichr(sdf["names"][:800].tolist(), gene_sets=['GO_Biological_Process_2023'], outdir=None)
term_ls = enr_bp.res2d.loc[enr_bp.res2d["Adjusted P-value"]<0.05, "Term"].tolist()[:20]
term_dic[gi] = term_ls
/tmp/ipykernel_3058393/1166100747.py:4: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
for gi,sdf in deg_df.groupby("group"):
import gptbioinsightor as gbi
### set API KEY
import os
os.environ['API_KEY'] = "sk-**"
background = "Cells are PBMCs from a Healthy Donor."
gbi.depict_pathway(term_dic, out="Pathway.md",
celltype_dic=celltype_dic, background=background,
provider="aliyun", model="qwen2-72b-instruct")