cafe.metric.metric_velocity

`cafe.metric.metric_velocity`

`calculate_velocity_metrics(fadata, cluster_edges, cluster=None, basis=None, model_name=None, recompute_pseudo_velocity=False, return_raw=False, summary=True)`

Evaluate velocity estimation results using 5 metrics.

Parameters:

Name	Type	Description	Default
`adata`	`Anndata`	Anndata object.	required
`cluster_edges`	`list of tuples("A", "B")`	pairs of clusters has transition direction A->B	required
`cluster`	`str`	key to the cluster column in adata.obs DataFrame.	`None`
`basis`	`str`	key to x embedding for visualization.	`None`
`model_name`	`str`	model name in raw_wrapper_dict.	`None`
`recompute_pseudo_velocity`	`bool`	whether to recompute pseudo velocity.	`False`
`return_raw`	`bool`	return aggregated or raw scores.	`False`
`summary`	`bool`	if not return_raw, whether to return summary scores.	`True`

Returns:

Name	Type	Description
`dict`		aggregated metric scores.

Source code in cafe/metric/metric_velocity.py

def calculate_velocity_metrics(
    fadata: FateAnnData,
    cluster_edges: list,
    cluster: str = None,
    basis: str = None,
    model_name: str = None,
    recompute_pseudo_velocity: bool = False,
    return_raw: bool = False,
    summary: bool = True,
):
    """Evaluate velocity estimation results using 5 metrics.

    Args:
        adata (Anndata): Anndata object.
        cluster_edges (list of tuples("A", "B")): pairs of clusters has transition direction A->B
        cluster (str): key to the cluster column in adata.obs DataFrame.
        basis (str): key to x embedding for visualization.
        model_name (str): model name in raw_wrapper_dict.
        recompute_pseudo_velocity(bool): whether to recompute pseudo velocity.
        return_raw (bool): return aggregated or raw scores.
        summary (bool): if not return_raw, whether to return summary scores.


    Returns:
        dict: aggregated metric scores.

    """
    if cluster is None:
        cluster = fadata.prior_information.get("cluster")
    if basis is None:
        basis = fadata.prior_information.get("basis")

    # extract velocity embedding from raw wrapper dict
    velocity_basis = f"velocity_{basis[2:]}"  # add velocity embedding temporarily, delete after metric calculation
    raw_wrapper_dict = fadata.get_raw_wrapper_dict(model_name)

    if velocity_basis not in raw_wrapper_dict:
        logger.info("pseudo velocity don't exist, compute and add it to raw_wrapper_dict ")
        raw_wrapper_dict[velocity_basis] = fadata.get_trajectory_pseudo_velocity(basis=basis, model_name=model_name)
    elif recompute_pseudo_velocity:
        logger.info("recompute pseudo velocity")
        raw_wrapper_dict[velocity_basis] = fadata.get_trajectory_pseudo_velocity(basis=basis, model_name=model_name)
    # fadata.trajectory_history_dict[model_name]["raw_wrapper_dict"] = raw_wrapper_dict # update raw_wrapper_dict
    velocity_embedding = raw_wrapper_dict[velocity_basis]

    # extract neighbors indices from distance matrix
    neighbor_dict = fadata.uns["neighbors"]
    if "indices" not in neighbor_dict:
        logger.debug("extract knn indices to 'adata.uns['neighbors']['indices']' for metric calculation")
        n_neighbors = neighbor_dict["params"]["n_neighbors"]
        if isinstance(n_neighbors, np.ndarray):
            n_neighbors = n_neighbors.item()
        distances = fadata.obsp["distances"]  # csr matrix
        neighbor_dict["indices"] = distances.indices.reshape(-1, n_neighbors - 1)

    # NOTE: (Important) calculate metrics for low dimensional velocity embedding,
    with temporary_obsm_key(fadata, velocity_basis, velocity_embedding):
        crs_bdr_crc = cross_boundary_correctness(fadata, cluster, velocity_basis, cluster_edges, return_raw, basis)
        ic_coh = inner_cluster_coh(fadata, cluster, velocity_basis, return_raw)

    # summarize if need
    if return_raw:
        # if return raw scores, do nothing
        if summary:
            logger.debug("'return_raw`'and 'summary' both set True, only 'return_raw' is effective, don't summary result.")
    else:
        if summary:
            # if don't return raw, just summary score, do it
            crs_bdr_crc = crs_bdr_crc[1]
            ic_coh = ic_coh[1]

    return {
        "velocity_cbdir": crs_bdr_crc,
        "velocity_icvcoh": ic_coh,
    }

`cross_boundary_correctness(adata, k_cluster, k_velocity, cluster_edges, return_raw=False, x_emb='X_umap')`

Cross-Boundary Direction Correctness Score (A->B)

Parameters:

Name	Type	Description	Default
`adata`	`Anndata`	Anndata object.	required
`k_cluster`	`str`	key to the cluster column in adata.obs DataFrame.	required
`k_velocity`	`str`	key to the velocity matrix in adata.obsm.	required
`cluster_edges`	`list of tuples("A", "B")`	pairs of clusters has transition direction A->B	required
`return_raw`	`bool`	return aggregated or raw scores.	`False`
`x_emb`	`str`	key to x embedding for visualization.	`'X_umap'`

Returns:

Name	Type	Description
`dict`		all_scores indexed by cluster_edges
		or
`dict`		mean scores indexed by cluster_edges
`float`		averaged score over all cells.

Source code in cafe/metric/metric_velocity.py

def cross_boundary_correctness(adata, k_cluster, k_velocity, cluster_edges, return_raw=False, x_emb="X_umap"):
    """Cross-Boundary Direction Correctness Score (A->B)

    Args:
        adata (Anndata): Anndata object.
        k_cluster (str): key to the cluster column in adata.obs DataFrame.
        k_velocity (str): key to the velocity matrix in adata.obsm.
        cluster_edges (list of tuples("A", "B")): pairs of clusters has transition direction A->B
        return_raw (bool): return aggregated or raw scores.
        x_emb (str): key to x embedding for visualization.

    Returns:
        dict: all_scores indexed by cluster_edges
        or
        dict: mean scores indexed by cluster_edges
        float: averaged score over all cells.

    """
    scores = {}
    all_scores = {}

    v_emb = adata.obsm[k_velocity]  # velocity embedding space
    x_emb = adata.obsm[x_emb]  # expression embedding space

    for u, v in cluster_edges:
        sel = adata.obs[k_cluster] == u
        nbs = adata.uns["neighbors"]["indices"][sel]  # [n * 30] # TODO: update here mannuly add indices

        boundary_nodes = map(lambda nodes: keep_type(adata, nodes, v, k_cluster), nbs)
        x_points = x_emb[sel]
        x_velocities = v_emb[sel]

        type_score = []
        for x_pos, x_vel, nodes in zip(x_points, x_velocities, boundary_nodes):
            if len(nodes) == 0:
                continue

            position_dif = x_emb[nodes] - x_pos
            dir_scores = cosine_similarity(position_dif, x_vel.reshape(1, -1)).flatten()
            type_score.append(np.mean(dir_scores))

        scores[(u, v)] = np.mean(type_score)
        all_scores[(u, v)] = type_score

    if return_raw:
        return all_scores

    return scores, np.mean([sc for sc in scores.values()])  # here use mean

`inner_cluster_coh(adata, k_cluster, k_velocity, return_raw=False)`

In-cluster Coherence Score.

Parameters:

Name	Type	Description	Default
`adata`	`Anndata`	Anndata object.	required
`k_cluster`	`str`	key to the cluster column in adata.obs DataFrame.	required
`k_velocity`	`str`	key to the velocity matrix in adata.obsm.	required
`return_raw`	`bool`	return aggregated or raw scores.	`False`

Returns:

Name	Type	Description
`dict`		all_scores indexed by cluster_edges
		or
`dict`		mean scores indexed by cluster_edges
`float`		averaged score over all cells.

Source code in cafe/metric/metric_velocity.py

def inner_cluster_coh(adata, k_cluster, k_velocity, return_raw=False):
    """In-cluster Coherence Score.

    Args:
        adata (Anndata): Anndata object.
        k_cluster (str): key to the cluster column in adata.obs DataFrame.
        k_velocity (str): key to the velocity matrix in adata.obsm.
        return_raw (bool): return aggregated or raw scores.

    Returns:
        dict: all_scores indexed by cluster_edges
        or
        dict: mean scores indexed by cluster_edges
        float: averaged score over all cells.

    """
    velocities = adata.obsm[k_velocity]

    clusters = np.unique(adata.obs[k_cluster])
    scores = {}
    all_scores = {}
    for cat in clusters:
        sel = adata.obs[k_cluster] == cat
        nbs = adata.uns["neighbors"]["indices"][sel]
        same_cat_nodes = map(lambda nodes: keep_type(adata, nodes, cat, k_cluster), nbs)
        # velocities = adata.layers[k_velocity] # replace by
        cat_vels = velocities[sel]
        cat_score = [cosine_similarity(cat_vels[[ith]], velocities[nodes]).mean() for ith, nodes in enumerate(same_cat_nodes) if len(nodes) > 0]
        all_scores[cat] = cat_score
        scores[cat] = np.mean(cat_score)

    if return_raw:
        return all_scores

    return scores, np.mean([sc for sc in scores.values()])

`keep_type(adata, nodes, target, k_cluster)`

Select cells of targeted type

Parameters:

Name	Type	Description	Default
`adata`	`Anndata`	Anndata object.	required
`nodes`	`list`	Indexes for cells	required
`target`	`str`	Cluster name.	required
`k_cluster`	`str`	Cluster key in adata.obs dataframe	required

Returns:

Name	Type	Description
`list`		Selected cells.

Source code in cafe/metric/metric_velocity.py

def keep_type(adata, nodes, target, k_cluster):
    """Select cells of targeted type

    Args:
        adata (Anndata): Anndata object.
        nodes (list): Indexes for cells
        target (str): Cluster name.
        k_cluster (str): Cluster key in adata.obs dataframe

    Returns:
        list: Selected cells.

    """
    return nodes[adata.obs[k_cluster][nodes].values == target]

`summary_scores(all_scores)`

Summarize group scores.

Parameters:

Name	Type	Description	Default
`all_scores`	`dict{str,list}`	{group name: score list of individual cells}.	required

Returns:

Name	Type	Description
		dict{str,float}: Group-wise aggregation scores.
`float`		score aggregated on all samples

Source code in cafe/metric/metric_velocity.py

def summary_scores(all_scores):
    """Summarize group scores.

    Args:
        all_scores (dict{str,list}): {group name: score list of individual cells}.

    Returns:
        dict{str,float}: Group-wise aggregation scores.
        float: score aggregated on all samples

    """
    sep_scores = {k: np.mean(s) for k, s in all_scores.items() if s}
    overal_agg = np.mean([s for k, s in sep_scores.items() if s])
    return sep_scores, overal_agg