(FIX)Benchmark¶
1. Environment¶
In [1]:
Copied!
import cfe
import scanpy as sc
import pandas as pd
cfe.settings.backend = "python_function"
cfe.logger.setLevel("INFO")
import cfe
import scanpy as sc
import pandas as pd
cfe.settings.backend = "python_function"
cfe.logger.setLevel("INFO")
[2025年07月02日 09时23分58秒] INFO _____ _ _ ______ _ ______ _ / ____| | | | ____| | | | ____| | | | | ___| | | |__ __ _| |_ ___| |__ __ ___ __ | | ___ _ __ ___ _ __ | | / _ \ | | __/ _` | __/ _ \ __| \ \/ / '_ \| |/ _ \| '__/ _ \ '__| | |___| __/ | | | | (_| | || __/ |____ > <| |_) | | (_) | | | __/ | \_____\___|_|_|_| \__,_|\__\___|______/_/\_\ .__/|_|\___/|_| \___|_| | | |_|
2. Data¶
In [2]:
Copied!
# sample 500 cells from pancreas dataset in scvelo
fadata = cfe.data.read_pancrease(n_obs=500)
fadata
# sample 500 cells from pancreas dataset in scvelo
fadata = cfe.data.read_pancrease(n_obs=500)
fadata
Out[2]:
AnnData object with n_obs × n_vars = 500 × 27998
obs: 'clusters_coarse', 'clusters', 'S_score', 'G2M_score'
var: 'highly_variable_genes'
uns: 'clusters_coarse_colors', 'clusters_colors', 'day_colors', 'neighbors', 'pca', 'cfe'
obsm: 'X_pca', 'X_umap'
layers: 'spliced', 'unspliced', 'expression', 'count'
In [3]:
Copied!
# add milestone network manually
milestone_network = pd.DataFrame(
data=[
["Ductal", "Ngn3 low EP"],
["Ngn3 low EP", "Ngn3 high EP"],
["Ngn3 high EP", "Pre-endocrine"],
["Pre-endocrine", "Alpha"],
["Pre-endocrine", "Beta"],
["Pre-endocrine", "Delta"],
["Pre-endocrine", "Epsilon"],
],
columns=["from", "to"]
)
fadata.add_trajectory_mannually(milestone_network)
# add milestone network manually
milestone_network = pd.DataFrame(
data=[
["Ductal", "Ngn3 low EP"],
["Ngn3 low EP", "Ngn3 high EP"],
["Ngn3 high EP", "Pre-endocrine"],
["Pre-endocrine", "Alpha"],
["Pre-endocrine", "Beta"],
["Pre-endocrine", "Delta"],
["Pre-endocrine", "Epsilon"],
],
columns=["from", "to"]
)
fadata.add_trajectory_mannually(milestone_network)
In [4]:
Copied!
cluster_key = "milestone_color"
fadata.group_onto_nearest_milestones(cluster_key=cluster_key) # new cluster color
basis = "umap"
cluster_key_list = ["milestone", cluster_key]
cfe.plot.plot_graph(fadata, color=cluster_key_list)
cfe.plot.plot_trajectory(fadata, basis=basis, color=cluster_key_list)
cluster_key = "milestone_color"
fadata.group_onto_nearest_milestones(cluster_key=cluster_key) # new cluster color
basis = "umap"
cluster_key_list = ["milestone", cluster_key]
cfe.plot.plot_graph(fadata, color=cluster_key_list)
cfe.plot.plot_trajectory(fadata, basis=basis, color=cluster_key_list)
Out[4]:
<Axes: title={'center': 'milestone_color'}, xlabel='UMAP1', ylabel='UMAP2'>
3. Method¶
In [5]:
Copied!
# TODO: PAGA need to be optimized, disconnected graph
prior_information = {
"start_id": fadata.obs.index[0],
"groups_id": fadata.obs[cluster_key].tolist()
}
parameters = {"filter_features": False, "connectivity_cutoff": 0.3}
fadata.add_prior_information(**prior_information) # add prior information to fadata
# choose methods
#method_name_list = ["paga", "comp1", "angle", "state_comp", "cluster_mst", "projection_mst", "graph_mst", "scvelo"]
# TODO: angle will result in metric calculation error
# TODO: scvelo need apropriate velocity wrapper
method_name_list = ["comp1", "state_comp","cluster_mst","projection_mst","graph_mst"]
# execute methods
for method_name in method_name_list:
method = cfe.method.FateMethod(method_name=method_name)
method.infer_trajectory(fadata)
cfe.plot.plot_trajectory(fadata, basis="umap", color=cluster_key_list)
# TODO: PAGA need to be optimized, disconnected graph
prior_information = {
"start_id": fadata.obs.index[0],
"groups_id": fadata.obs[cluster_key].tolist()
}
parameters = {"filter_features": False, "connectivity_cutoff": 0.3}
fadata.add_prior_information(**prior_information) # add prior information to fadata
# choose methods
#method_name_list = ["paga", "comp1", "angle", "state_comp", "cluster_mst", "projection_mst", "graph_mst", "scvelo"]
# TODO: angle will result in metric calculation error
# TODO: scvelo need apropriate velocity wrapper
method_name_list = ["comp1", "state_comp","cluster_mst","projection_mst","graph_mst"]
# execute methods
for method_name in method_name_list:
method = cfe.method.FateMethod(method_name=method_name)
method.infer_trajectory(fadata)
cfe.plot.plot_trajectory(fadata, basis="umap", color=cluster_key_list)
[2025年07月02日 09时24分04秒] INFO Loaded function: <function cf_comp1 at 0x7f87f83b7b50> from /home/huang/PyCode/scRNA/CellFateExplorer/CellFateExplorer/cfe/method/functi on/cf_comp1.py INFO method_backend: <cfe.method.fate_function_backend.FunctionBackend object at 0x7f87f84a9150> [2025年07月02日 09时24分05秒] INFO Loaded function: <function cf_state_comp at 0x7f87f926fe20> from /home/huang/PyCode/scRNA/CellFateExplorer/CellFateExplorer/cfe/method/functi on/cf_state_comp.py INFO method_backend: <cfe.method.fate_function_backend.FunctionBackend object at 0x7f87fc6ca920> [2025年07月02日 09时24分07秒] INFO Loaded function: <function cf_cluster_mst at 0x7f87f8f705e0> from /home/huang/PyCode/scRNA/CellFateExplorer/CellFateExplorer/cfe/method/functi on/cf_cluster_mst.py INFO method_backend: <cfe.method.fate_function_backend.FunctionBackend object at 0x7f87fa5f52d0> [2025年07月02日 09时24分08秒] INFO Loaded function: <function cf_projection_mst at 0x7f87f7b4aa70> from /home/huang/PyCode/scRNA/CellFateExplorer/CellFateExplorer/cfe/method/functi on/cf_projection_mst.py INFO method_backend: <cfe.method.fate_function_backend.FunctionBackend object at 0x7f87f896ee30> [2025年07月02日 09时24分09秒] INFO Loaded function: <function cf_graph_mst at 0x7f87f3fe11b0> from /home/huang/PyCode/scRNA/CellFateExplorer/CellFateExplorer/cfe/method/functi on/cf_graph_mst.py INFO method_backend: <cfe.method.fate_function_backend.FunctionBackend object at 0x7f87f3cf6a40> [2025年07月02日 09时24分13秒] WARNING The number of colors(260) is greater than the number of colors in the 'Set3' palette(12), and the 'husl' palette selection is used.
Show model name
In [6]:
Copied!
parsed_model_name_list = fadata.get_all_model_name() # 解析后的模型名称
model_name_list = fadata.get_all_model_name(parse=False)
parsed_model_name_list, model_name_list
parsed_model_name_list = fadata.get_all_model_name() # 解析后的模型名称
model_name_list = fadata.get_all_model_name(parse=False)
parsed_model_name_list, model_name_list
[2025年07月02日 09时24分27秒] WARNING 'ref' is not a valid random_time_string, don't need parse
Out[6]:
(['ref', 'comp1-python_function', 'state_comp-python_function', 'cluster_mst-python_function', 'projection_mst-python_function', 'graph_mst-python_function'], ['ref', '20250702_092404__comp1-python_function__i8HFlNFBxM', '20250702_092405__state_comp-python_function__mnHc6H1N0x', '20250702_092407__cluster_mst-python_function__ktppUK6d4l', '20250702_092408__projection_mst-python_function__CyA1lrEann', '20250702_092409__graph_mst-python_function__5hBTb7LQoQ'])
4. Metric¶
Available metrics:
In [7]:
Copied!
cfe.metric.metrics
cfe.metric.metrics
Out[7]:
| metric_id | plotmath | latex | html | long_name | category | type | perfect | worst | symmetric | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | correlation | cor[dist] | \mathit{cor}_{\textrm{dist}} | cor<sub>dist</sub> | Geodesic distance correlation | cell positions | specific | 1 | 0.0 | True |
| 1 | rf_mse | MSE[rf] | \mathit{MSE}_{\textit{rf}} | MSE<sub>rf</sub> | Random Forest MSE | neighbourhood | specific | 0 | 0.3 | False |
| 2 | rf_nmse | NMSE[rf] | \mathit{NMSE}_{\textit{rf}} | NMSE<sub>rf</sub> | Random Forest Normalised MSE | neighbourhood | specific | 1 | 0.0 | False |
| 3 | rf_rsq | R[rf]^2 | R^{2}_{rf} | R<sup>2</sup><sub>rf</sub> | Random Forest R² | neighbourhood | specific | 1 | 0.0 | False |
| 4 | lm_nmse | NMSE[lm] | \mathit{NMSE}_{\textit{lm}} | NMSE<sub>lm</sub> | Linear regression Normalised MSE | neighbourhood | specific | 1 | 0.0 | False |
| 5 | lm_mse | MSE[lm] | \mathit{MSE}_{\textit{lm}} | MSE<sub>lm</sub> | Linear regression MSE | neighbourhood | specific | 0 | 0.3 | False |
| 6 | lm_rsq | R[lm]^2 | R^{2}_{lm} | R<sup>2</sup><sub>lm</sub> | Linear regression R² | neighbourhood | specific | 1 | 0.0 | False |
| 7 | edge_flip | edgeflip | \textrm{edgeflip} | edgeflip | Edge flip | topology | specific | 1 | 0.0 | True |
| 8 | him | HIM | \textrm{HIM} | HIM | Hamming-Ipsen-Mikhailov similarity | topology | specific | 1 | 0.0 | True |
| 9 | isomorphic | isomorphic | \textrm{isomorphic} | Isomorphic | isomorphic | topology | specific | 1 | 0.0 | True |
| 10 | featureimp_cor | cor[features] | \mathit{cor}_{\textrm{features}} | cor<sub>features</sub> | Feature importance correlation | features | application | 1 | 0.0 | True |
| 11 | featureimp_wcor | wcor[features] | \mathit{wcor}_{\textrm{features}} | wcor<sub>features</sub> | Feature importance weighted correlation | features | application | 1 | 0.0 | False |
| 12 | featureimp_ks | ks[features] | \mathit{ks}_{\textrm{features}} | ks<sub>feature</sub> | Feature importance enrichment ks | features | application | 1 | 0.0 | True |
| 13 | featureimp_wilcox | wilcox[features] | \mathit{wilcox}_{\textrm{features}} | wilcox<sub>feature</sub> | Feature importance enrichment wilcox | features | application | 1 | 0.0 | True |
| 14 | F1_branches | F1[branches] | \mathit{F1}_{\textit{branches}} | F1<sub>branches</sub> | Overlap between the branches | branch assignment | specific | 1 | 0.0 | True |
| 15 | F1_milestones | F1[milestones] | \mathit{F1}_{\textit{milestones}} | F1<sub>milestones</sub> | Overlap between the milestones | branch assignment | specific | 1 | 0.0 | True |
| 16 | harm_mean | mean[harmonic] | \mathit{mean}_{\textit{harmonic}} | mean<sub>harmonic</sub> | Harmonic mean | average | overall | 1 | 0.0 | False |
| 17 | geom_mean | mean[geometric] | \mathit{mean}_{\textit{geometric}} | mean<sub>geometric</sub> | Geometric mean | average | overall | 1 | 0.0 | False |
| 18 | arith_mean | mean[arithmetic] | \mathit{mean}_{\textit{arithmetic}} | mean<sub>arithmetic</sub> | Arithmetic mean | average | overall | 1 | 0.0 | False |
Methods are chosen to be calculated
In [8]:
Copied!
implemented = [
"correlation",
"rf_mse", "rf_rsq", "rf_nmse",
"lm_mse", "lm_rsq", "lm_nmse",
"edge_flip", "him",
# feature importance need to be fixed
# "featureimp_cor", "featureimp_wcor",
# "featureimp_ks", "featureimp_wilcox",
"F1_branches", "F1_milestones"
]
implemented
implemented = [
"correlation",
"rf_mse", "rf_rsq", "rf_nmse",
"lm_mse", "lm_rsq", "lm_nmse",
"edge_flip", "him",
# feature importance need to be fixed
# "featureimp_cor", "featureimp_wcor",
# "featureimp_ks", "featureimp_wilcox",
"F1_branches", "F1_milestones"
]
implemented
Out[8]:
['correlation', 'rf_mse', 'rf_rsq', 'rf_nmse', 'lm_mse', 'lm_rsq', 'lm_nmse', 'edge_flip', 'him', 'F1_branches', 'F1_milestones']
Metric calculation
In [9]:
Copied!
models = list(fadata.uns["cfe"]["trajectory_history_dict"].keys())
# 4. 确保每个模型都有 waypoint_wrapper
for m in set(models) | {"ref"}:
fadata.model_name = m
if not fadata.is_wrapped_with_waypoints:
fadata.add_waypoints()
# 4. 针对每个方法,调用 calculate_metrics 并收集结果
records = {}
for model in models:
# 跳过参考自身(ref vs ref)若不想算,可以加 if model=="ref": continue
print(f"Calculating metrics for model: {model}")
res = cfe.metric.calculate_metrics(
fadata,
now_model=model,
ref_model="ref",
simplify=False, # 是否简化结果
metrics=implemented
)
# 把可能缺失的指标填成 NaN
for m in implemented:
res.setdefault(m, float("nan"))
records[model] = res
# 5. 构造成 DataFrame
df = pd.DataFrame.from_dict(records, orient="index", columns=implemented)
# 6. (可选)把行索引改成更可读的名字,或保存到文件
df.index.name = "method"
df
models = list(fadata.uns["cfe"]["trajectory_history_dict"].keys())
# 4. 确保每个模型都有 waypoint_wrapper
for m in set(models) | {"ref"}:
fadata.model_name = m
if not fadata.is_wrapped_with_waypoints:
fadata.add_waypoints()
# 4. 针对每个方法,调用 calculate_metrics 并收集结果
records = {}
for model in models:
# 跳过参考自身(ref vs ref)若不想算,可以加 if model=="ref": continue
print(f"Calculating metrics for model: {model}")
res = cfe.metric.calculate_metrics(
fadata,
now_model=model,
ref_model="ref",
simplify=False, # 是否简化结果
metrics=implemented
)
# 把可能缺失的指标填成 NaN
for m in implemented:
res.setdefault(m, float("nan"))
records[model] = res
# 5. 构造成 DataFrame
df = pd.DataFrame.from_dict(records, orient="index", columns=implemented)
# 6. (可选)把行索引改成更可读的名字,或保存到文件
df.index.name = "method"
df
Calculating metrics for model: ref Calculating metrics for model: 20250702_092404__comp1-python_function__i8HFlNFBxM Calculating metrics for model: 20250702_092405__state_comp-python_function__mnHc6H1N0x Calculating metrics for model: 20250702_092407__cluster_mst-python_function__ktppUK6d4l Calculating metrics for model: 20250702_092408__projection_mst-python_function__CyA1lrEann Calculating metrics for model: 20250702_092409__graph_mst-python_function__5hBTb7LQoQ
Out[9]:
| correlation | rf_mse | rf_rsq | rf_nmse | lm_mse | lm_rsq | lm_nmse | edge_flip | him | F1_branches | F1_milestones | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| method | |||||||||||
| ref | 1.000000 | NaN | NaN | NaN | NaN | NaN | NaN | 1.000000 | 1.000000 | NaN | NaN |
| 20250702_092404__comp1-python_function__i8HFlNFBxM | 0.197914 | NaN | NaN | NaN | NaN | NaN | NaN | 0.000000 | 0.250013 | NaN | NaN |
| 20250702_092405__state_comp-python_function__mnHc6H1N0x | 0.000000 | NaN | NaN | NaN | NaN | NaN | NaN | 0.285714 | 0.191442 | NaN | NaN |
| 20250702_092407__cluster_mst-python_function__ktppUK6d4l | 0.000000 | NaN | NaN | NaN | NaN | NaN | NaN | 0.000000 | 0.374486 | NaN | NaN |
| 20250702_092408__projection_mst-python_function__CyA1lrEann | 0.000000 | NaN | NaN | NaN | NaN | NaN | NaN | 0.666667 | 0.263755 | NaN | NaN |
| 20250702_092409__graph_mst-python_function__5hBTb7LQoQ | 0.022460 | NaN | NaN | NaN | NaN | NaN | NaN | 0.000000 | 0.000000 | NaN | NaN |