https://github.com/BostonGene/MFP
# Python3.7
import pandas as pd
from portraits.clustering import clustering_profile_metrics, clustering_profile_metrics_plot
from portraits.utils import read_gene_sets, ssgsea_formula, median_scale
# Example script
# Read signatures
gmt = read_gene_sets('signatures.gmt') # GMT format like in MSIGdb
# Read expressions
exp = pd.read_csv('expression.tsv', sep='\t', index_col=0) # log2+1 transformed; Genes in columns
exp=exp.T
# Calc signature scores
signature_scores = ssgsea_formula(exp, gmt)
# Scale signatures
signature_scores_scaled = median_scale(signature_scores)
signature_scores_scaled.to_csv('signature_scores.tsv', sep='\t', index=True)
# Check the clustering within a range of 30 to 65% similarity.
# >65% - usually graph is not connected; <30% - unreasonable correlation
clustering_metrics = clustering_profile_metrics(signature_scores_scaled, threshold_mm=(.3, .65), step=.01)
# Visualize the partitions
fig,ax=clustering_profile_metrics_plot(clustering_metrics)
fig.savefig('heat.png', dpi=300)
# Then select the best threshold using one ore more metrics.
best_threshold = '0.51'
#best_threshold = 0.51
def detect_type(ser, scores):
#cmeans = pd.DataFrame({cg: scores.loc[samps.index].mean() for cg, samps in ser.groupby(ser)})
cmeans = pd.DataFrame({cg: scores.loc[samps.index].mean() for cg, samps in ser.groupby("group")})
print(cmeans)
mapper = {}
deltas = (cmeans.loc[['Angiogenesis', 'Endothelium', 'CAF', 'Matrix', 'Matrix_remodeling']].mean() -
cmeans.loc[['MHCII', 'Antitumor_cytokines', 'Coactivation_molecules',
'B_cells', 'NK_cells', 'Checkpoint_inhibition',
'Effector_cells', 'T_cells', 'Th1_signature',
'T_cell_traffic', 'MHCI']].mean()).sort_values()
mapper[deltas.index[-1]] = 'F' # That's fibrotic
mapper[deltas.index[0]] = 'IE' # Immune enriched, non-fibrotic
cmeans.pop(deltas.index[-1])
cmeans.pop(deltas.index[0])
print(deltas)
print(cmeans)
deltas = (cmeans.loc[['Angiogenesis', 'Endothelium', 'CAF', 'Matrix', 'Matrix_remodeling',
'Protumor_cytokines', 'Neutrophil_signature', 'Granulocyte_traffic',
'Macrophages', 'Macrophage_DC_traffic', 'MDSC_traffic', 'MDSC',
'Th2_signature', 'T_reg_traffic', 'Treg', 'M1_signatures', 'MHCII',
'Antitumor_cytokines', 'Coactivation_molecules', 'B_cells', 'NK_cells',
'Checkpoint_inhibition', 'Effector_cells', 'T_cells', 'Th1_signature',
'T_cell_traffic', 'MHCI', 'EMT_signature']].mean() -
cmeans.loc['Proliferation_rate']).sort_values()
mapper[deltas.index[-1]] = 'IE/F' # Immune enriched & fibrotic
mapper[deltas.index[0]] = 'D' # Desert
print(deltas)
print(cmeans)
print(mapper)
#return ser.map(mapper).rename('MFP')
return mapper
print(clustering_metrics.axes)
# Detect cluster types
ser=clustering_metrics.loc[best_threshold]
df = pd.DataFrame({"sam":ser.perc.index,"group":ser.perc})
df.to_csv('group_clusters.tsv', sep='\t', index=False)
final_clusters = detect_type(df, signature_scores_scaled)
# Output the clusters
final_clusters.to_csv('final_clusters.tsv', sep='\t', index=True)
Bagaev A, Kotlov N, Nomie K, Svekolkin V, Gafurov A, Isaeva O, Osokin N, Kozlov I, Frenkel F, Gancharova O, Almog N, Tsiper M, Ataullakhanov R, Fowler N. Conserved pan-cancer microenvironment subtypes predict response to immunotherapy. Cancer Cell. 2021 Jun 14;39(6):845-865.e7. doi: 10.1016/j.ccell.2021.04.014. Epub 2021 May 20. PMID: 34019806.
如果觉得我的文章对您有用,请随意打赏。你的支持将鼓励我继续创作!