Skip to content

cafe.metric.metric_velocity

cafe.metric.metric_velocity

calculate_velocity_metrics(fadata, cluster_edges, cluster=None, basis=None, model_name=None, recompute_pseudo_velocity=False, return_raw=False, summary=True)

Evaluate velocity estimation results using 5 metrics.

Parameters:

Name Type Description Default
adata Anndata

Anndata object.

required
cluster_edges list of tuples("A", "B")

pairs of clusters has transition direction A->B

required
cluster str

key to the cluster column in adata.obs DataFrame.

None
basis str

key to x embedding for visualization.

None
model_name str

model name in raw_wrapper_dict.

None
recompute_pseudo_velocity bool

whether to recompute pseudo velocity.

False
return_raw bool

return aggregated or raw scores.

False
summary bool

if not return_raw, whether to return summary scores.

True

Returns:

Name Type Description
dict

aggregated metric scores.

Source code in cafe/metric/metric_velocity.py
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
def calculate_velocity_metrics(
    fadata: FateAnnData,
    cluster_edges: list,
    cluster: str = None,
    basis: str = None,
    model_name: str = None,
    recompute_pseudo_velocity: bool = False,
    return_raw: bool = False,
    summary: bool = True,
):
    """Evaluate velocity estimation results using 5 metrics.

    Args:
        adata (Anndata): Anndata object.
        cluster_edges (list of tuples("A", "B")): pairs of clusters has transition direction A->B
        cluster (str): key to the cluster column in adata.obs DataFrame.
        basis (str): key to x embedding for visualization.
        model_name (str): model name in raw_wrapper_dict.
        recompute_pseudo_velocity(bool): whether to recompute pseudo velocity.
        return_raw (bool): return aggregated or raw scores.
        summary (bool): if not return_raw, whether to return summary scores.


    Returns:
        dict: aggregated metric scores.

    """
    if cluster is None:
        cluster = fadata.prior_information.get("cluster")
    if basis is None:
        basis = fadata.prior_information.get("basis")

    # extract velocity embedding from raw wrapper dict
    velocity_basis = f"velocity_{basis[2:]}"  # add velocity embedding temporarily, delete after metric calculation
    raw_wrapper_dict = fadata.get_raw_wrapper_dict(model_name)

    if velocity_basis not in raw_wrapper_dict:
        logger.info("pseudo velocity don't exist, compute and add it to raw_wrapper_dict ")
        raw_wrapper_dict[velocity_basis] = fadata.get_trajectory_pseudo_velocity(basis=basis, model_name=model_name)
    elif recompute_pseudo_velocity:
        logger.info("recompute pseudo velocity")
        raw_wrapper_dict[velocity_basis] = fadata.get_trajectory_pseudo_velocity(basis=basis, model_name=model_name)
    # fadata.trajectory_history_dict[model_name]["raw_wrapper_dict"] = raw_wrapper_dict # update raw_wrapper_dict
    velocity_embedding = raw_wrapper_dict[velocity_basis]

    # extract neighbors indices from distance matrix
    neighbor_dict = fadata.uns["neighbors"]
    if "indices" not in neighbor_dict:
        logger.debug("extract knn indices to 'adata.uns['neighbors']['indices']' for metric calculation")
        n_neighbors = neighbor_dict["params"]["n_neighbors"]
        if isinstance(n_neighbors, np.ndarray):
            n_neighbors = n_neighbors.item()
        distances = fadata.obsp["distances"]  # csr matrix
        neighbor_dict["indices"] = distances.indices.reshape(-1, n_neighbors - 1)

    # NOTE: (Important) calculate metrics for low dimensional velocity embedding,
    with temporary_obsm_key(fadata, velocity_basis, velocity_embedding):
        crs_bdr_crc = cross_boundary_correctness(fadata, cluster, velocity_basis, cluster_edges, return_raw, basis)
        ic_coh = inner_cluster_coh(fadata, cluster, velocity_basis, return_raw)

    # summarize if need
    if return_raw:
        # if return raw scores, do nothing
        if summary:
            logger.debug("'return_raw`'and 'summary' both set True, only 'return_raw' is effective, don't summary result.")
    else:
        if summary:
            # if don't return raw, just summary score, do it
            crs_bdr_crc = crs_bdr_crc[1]
            ic_coh = ic_coh[1]

    return {
        "velocity_cbdir": crs_bdr_crc,
        "velocity_icvcoh": ic_coh,
    }

cross_boundary_correctness(adata, k_cluster, k_velocity, cluster_edges, return_raw=False, x_emb='X_umap')

Cross-Boundary Direction Correctness Score (A->B)

Parameters:

Name Type Description Default
adata Anndata

Anndata object.

required
k_cluster str

key to the cluster column in adata.obs DataFrame.

required
k_velocity str

key to the velocity matrix in adata.obsm.

required
cluster_edges list of tuples("A", "B")

pairs of clusters has transition direction A->B

required
return_raw bool

return aggregated or raw scores.

False
x_emb str

key to x embedding for visualization.

'X_umap'

Returns:

Name Type Description
dict

all_scores indexed by cluster_edges

or

dict

mean scores indexed by cluster_edges

float

averaged score over all cells.

Source code in cafe/metric/metric_velocity.py
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
def cross_boundary_correctness(adata, k_cluster, k_velocity, cluster_edges, return_raw=False, x_emb="X_umap"):
    """Cross-Boundary Direction Correctness Score (A->B)

    Args:
        adata (Anndata): Anndata object.
        k_cluster (str): key to the cluster column in adata.obs DataFrame.
        k_velocity (str): key to the velocity matrix in adata.obsm.
        cluster_edges (list of tuples("A", "B")): pairs of clusters has transition direction A->B
        return_raw (bool): return aggregated or raw scores.
        x_emb (str): key to x embedding for visualization.

    Returns:
        dict: all_scores indexed by cluster_edges
        or
        dict: mean scores indexed by cluster_edges
        float: averaged score over all cells.

    """
    scores = {}
    all_scores = {}

    v_emb = adata.obsm[k_velocity]  # velocity embedding space
    x_emb = adata.obsm[x_emb]  # expression embedding space

    for u, v in cluster_edges:
        sel = adata.obs[k_cluster] == u
        nbs = adata.uns["neighbors"]["indices"][sel]  # [n * 30] # TODO: update here mannuly add indices

        boundary_nodes = map(lambda nodes: keep_type(adata, nodes, v, k_cluster), nbs)
        x_points = x_emb[sel]
        x_velocities = v_emb[sel]

        type_score = []
        for x_pos, x_vel, nodes in zip(x_points, x_velocities, boundary_nodes):
            if len(nodes) == 0:
                continue

            position_dif = x_emb[nodes] - x_pos
            dir_scores = cosine_similarity(position_dif, x_vel.reshape(1, -1)).flatten()
            type_score.append(np.mean(dir_scores))

        scores[(u, v)] = np.mean(type_score)
        all_scores[(u, v)] = type_score

    if return_raw:
        return all_scores

    return scores, np.mean([sc for sc in scores.values()])  # here use mean

inner_cluster_coh(adata, k_cluster, k_velocity, return_raw=False)

In-cluster Coherence Score.

Parameters:

Name Type Description Default
adata Anndata

Anndata object.

required
k_cluster str

key to the cluster column in adata.obs DataFrame.

required
k_velocity str

key to the velocity matrix in adata.obsm.

required
return_raw bool

return aggregated or raw scores.

False

Returns:

Name Type Description
dict

all_scores indexed by cluster_edges

or

dict

mean scores indexed by cluster_edges

float

averaged score over all cells.

Source code in cafe/metric/metric_velocity.py
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
def inner_cluster_coh(adata, k_cluster, k_velocity, return_raw=False):
    """In-cluster Coherence Score.

    Args:
        adata (Anndata): Anndata object.
        k_cluster (str): key to the cluster column in adata.obs DataFrame.
        k_velocity (str): key to the velocity matrix in adata.obsm.
        return_raw (bool): return aggregated or raw scores.

    Returns:
        dict: all_scores indexed by cluster_edges
        or
        dict: mean scores indexed by cluster_edges
        float: averaged score over all cells.

    """
    velocities = adata.obsm[k_velocity]

    clusters = np.unique(adata.obs[k_cluster])
    scores = {}
    all_scores = {}
    for cat in clusters:
        sel = adata.obs[k_cluster] == cat
        nbs = adata.uns["neighbors"]["indices"][sel]
        same_cat_nodes = map(lambda nodes: keep_type(adata, nodes, cat, k_cluster), nbs)
        # velocities = adata.layers[k_velocity] # replace by
        cat_vels = velocities[sel]
        cat_score = [cosine_similarity(cat_vels[[ith]], velocities[nodes]).mean() for ith, nodes in enumerate(same_cat_nodes) if len(nodes) > 0]
        all_scores[cat] = cat_score
        scores[cat] = np.mean(cat_score)

    if return_raw:
        return all_scores

    return scores, np.mean([sc for sc in scores.values()])

keep_type(adata, nodes, target, k_cluster)

Select cells of targeted type

Parameters:

Name Type Description Default
adata Anndata

Anndata object.

required
nodes list

Indexes for cells

required
target str

Cluster name.

required
k_cluster str

Cluster key in adata.obs dataframe

required

Returns:

Name Type Description
list

Selected cells.

Source code in cafe/metric/metric_velocity.py
34
35
36
37
38
39
40
41
42
43
44
45
46
47
def keep_type(adata, nodes, target, k_cluster):
    """Select cells of targeted type

    Args:
        adata (Anndata): Anndata object.
        nodes (list): Indexes for cells
        target (str): Cluster name.
        k_cluster (str): Cluster key in adata.obs dataframe

    Returns:
        list: Selected cells.

    """
    return nodes[adata.obs[k_cluster][nodes].values == target]

summary_scores(all_scores)

Summarize group scores.

Parameters:

Name Type Description Default
all_scores dict{str,list}

{group name: score list of individual cells}.

required

Returns:

Name Type Description

dict{str,float}: Group-wise aggregation scores.

float

score aggregated on all samples

Source code in cafe/metric/metric_velocity.py
18
19
20
21
22
23
24
25
26
27
28
29
30
31
def summary_scores(all_scores):
    """Summarize group scores.

    Args:
        all_scores (dict{str,list}): {group name: score list of individual cells}.

    Returns:
        dict{str,float}: Group-wise aggregation scores.
        float: score aggregated on all samples

    """
    sep_scores = {k: np.mean(s) for k, s in all_scores.items() if s}
    overal_agg = np.mean([s for k, s in sep_scores.items() if s])
    return sep_scores, overal_agg