In [ ]:

Copied!





# Import packages
import logging
import os
from pathlib import Path

import matplotlib.pyplot as plt
import muspan as ms
import numpy as np
import pandas as pd
import scanpy as sc
import seaborn as sns
# Import packages
import logging
import os
from pathlib import Path

import matplotlib.pyplot as plt
import muspan as ms
import numpy as np
import pandas as pd
import scanpy as sc
import seaborn as sns

A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.2.6 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/Users/sarapatti/Desktop/PhD_projects/Llyod_lab/ReCoDe-spatial-transcriptomics/.venv/lib/python3.12/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/Users/sarapatti/Desktop/PhD_projects/Llyod_lab/ReCoDe-spatial-transcriptomics/.venv/lib/python3.12/site-packages/traitlets/config/application.py", line 1075, in launch_instance
    app.start()
  File "/Users/sarapatti/Desktop/PhD_projects/Llyod_lab/ReCoDe-spatial-transcriptomics/.venv/lib/python3.12/site-packages/ipykernel/kernelapp.py", line 739, in start
    self.io_loop.start()
  File "/Users/sarapatti/Desktop/PhD_projects/Llyod_lab/ReCoDe-spatial-transcriptomics/.venv/lib/python3.12/site-packages/tornado/platform/asyncio.py", line 211, in start
    self.asyncio_loop.run_forever()
  File "/Users/sarapatti/miniforge3/lib/python3.12/asyncio/base_events.py", line 641, in run_forever
    self._run_once()
  File "/Users/sarapatti/miniforge3/lib/python3.12/asyncio/base_events.py", line 1986, in _run_once
    handle._run()
  File "/Users/sarapatti/miniforge3/lib/python3.12/asyncio/events.py", line 88, in _run
    self._context.run(self._callback, *self._args)
  File "/Users/sarapatti/Desktop/PhD_projects/Llyod_lab/ReCoDe-spatial-transcriptomics/.venv/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 545, in dispatch_queue
    await self.process_one()
  File "/Users/sarapatti/Desktop/PhD_projects/Llyod_lab/ReCoDe-spatial-transcriptomics/.venv/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 534, in process_one
    await dispatch(*args)
  File "/Users/sarapatti/Desktop/PhD_projects/Llyod_lab/ReCoDe-spatial-transcriptomics/.venv/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 437, in dispatch_shell
    await result
  File "/Users/sarapatti/Desktop/PhD_projects/Llyod_lab/ReCoDe-spatial-transcriptomics/.venv/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 362, in execute_request
    await super().execute_request(stream, ident, parent)
  File "/Users/sarapatti/Desktop/PhD_projects/Llyod_lab/ReCoDe-spatial-transcriptomics/.venv/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 778, in execute_request
    reply_content = await reply_content
  File "/Users/sarapatti/Desktop/PhD_projects/Llyod_lab/ReCoDe-spatial-transcriptomics/.venv/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 449, in do_execute
    res = shell.run_cell(
  File "/Users/sarapatti/Desktop/PhD_projects/Llyod_lab/ReCoDe-spatial-transcriptomics/.venv/lib/python3.12/site-packages/ipykernel/zmqshell.py", line 549, in run_cell
    return super().run_cell(*args, **kwargs)
  File "/Users/sarapatti/Desktop/PhD_projects/Llyod_lab/ReCoDe-spatial-transcriptomics/.venv/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3100, in run_cell
    result = self._run_cell(
  File "/Users/sarapatti/Desktop/PhD_projects/Llyod_lab/ReCoDe-spatial-transcriptomics/.venv/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3155, in _run_cell
    result = runner(coro)
  File "/Users/sarapatti/Desktop/PhD_projects/Llyod_lab/ReCoDe-spatial-transcriptomics/.venv/lib/python3.12/site-packages/IPython/core/async_helpers.py", line 128, in _pseudo_sync_runner
    coro.send(None)
  File "/Users/sarapatti/Desktop/PhD_projects/Llyod_lab/ReCoDe-spatial-transcriptomics/.venv/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3367, in run_cell_async
    has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
  File "/Users/sarapatti/Desktop/PhD_projects/Llyod_lab/ReCoDe-spatial-transcriptomics/.venv/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3612, in run_ast_nodes
    if await self.run_code(code, result, async_=asy):
  File "/Users/sarapatti/Desktop/PhD_projects/Llyod_lab/ReCoDe-spatial-transcriptomics/.venv/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3672, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/var/folders/yq/6fgvx0fj0cs57zmqydzbhbfh0000gn/T/ipykernel_44779/2125299851.py", line 7, in <module>
    import muspan as ms
  File "/Users/sarapatti/Desktop/PhD_projects/Llyod_lab/ReCoDe-spatial-transcriptomics/.venv/lib/python3.12/site-packages/muspan/__init__.py", line 15, in <module>
    from . import distribution
  File "/Users/sarapatti/Desktop/PhD_projects/Llyod_lab/ReCoDe-spatial-transcriptomics/.venv/lib/python3.12/site-packages/muspan/distribution/__init__.py", line 6, in <module>
    from .sliced_wasserstein_distance import sliced_wasserstein_distance
  File "/Users/sarapatti/Desktop/PhD_projects/Llyod_lab/ReCoDe-spatial-transcriptomics/.venv/lib/python3.12/site-packages/muspan/distribution/sliced_wasserstein_distance.py", line 5, in <module>
    import ot
  File "/Users/sarapatti/Desktop/PhD_projects/Llyod_lab/ReCoDe-spatial-transcriptomics/.venv/lib/python3.12/site-packages/ot/__init__.py", line 20, in <module>
    from . import lp
  File "/Users/sarapatti/Desktop/PhD_projects/Llyod_lab/ReCoDe-spatial-transcriptomics/.venv/lib/python3.12/site-packages/ot/lp/__init__.py", line 16, in <module>
    from .dmmot import dmmot_monge_1dgrid_loss, dmmot_monge_1dgrid_optimize
  File "/Users/sarapatti/Desktop/PhD_projects/Llyod_lab/ReCoDe-spatial-transcriptomics/.venv/lib/python3.12/site-packages/ot/lp/dmmot.py", line 12, in <module>
    from ..backend import get_backend
  File "/Users/sarapatti/Desktop/PhD_projects/Llyod_lab/ReCoDe-spatial-transcriptomics/.venv/lib/python3.12/site-packages/ot/backend.py", line 107, in <module>
    import torch
  File "/Users/sarapatti/Desktop/PhD_projects/Llyod_lab/ReCoDe-spatial-transcriptomics/.venv/lib/python3.12/site-packages/torch/__init__.py", line 1477, in <module>
    from .functional import *  # noqa: F403
  File "/Users/sarapatti/Desktop/PhD_projects/Llyod_lab/ReCoDe-spatial-transcriptomics/.venv/lib/python3.12/site-packages/torch/functional.py", line 9, in <module>
    import torch.nn.functional as F
  File "/Users/sarapatti/Desktop/PhD_projects/Llyod_lab/ReCoDe-spatial-transcriptomics/.venv/lib/python3.12/site-packages/torch/nn/__init__.py", line 1, in <module>
    from .modules import *  # noqa: F403
  File "/Users/sarapatti/Desktop/PhD_projects/Llyod_lab/ReCoDe-spatial-transcriptomics/.venv/lib/python3.12/site-packages/torch/nn/modules/__init__.py", line 35, in <module>
    from .transformer import TransformerEncoder, TransformerDecoder, \
  File "/Users/sarapatti/Desktop/PhD_projects/Llyod_lab/ReCoDe-spatial-transcriptomics/.venv/lib/python3.12/site-packages/torch/nn/modules/transformer.py", line 20, in <module>
    device: torch.device = torch.device(torch._C._get_default_device()),  # torch.device('cpu'),

---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
Cell In[1], line 10
      8 import numpy as np
      9 import scanpy as sc
---> 10 import spatialdata as sd
     11 import squidpy as sq
     12 import seaborn as sns

File ~/Desktop/PhD_projects/Llyod_lab/ReCoDe-spatial-transcriptomics/.venv/lib/python3.12/site-packages/spatialdata/__init__.py:58
     20 __version__ = version("spatialdata")
     22 __all__ = [
     23     "models",
     24     "transformations",
   (...)     55     "deepcopy",
     56 ]
---> 58 from spatialdata import dataloader, datasets, models, transformations
     59 from spatialdata._core._deepcopy import deepcopy
     60 from spatialdata._core.centroids import get_centroids

File ~/Desktop/PhD_projects/Llyod_lab/ReCoDe-spatial-transcriptomics/.venv/lib/python3.12/site-packages/spatialdata/dataloader/__init__.py:2
      1 try:
----> 2     from spatialdata.dataloader.datasets import ImageTilesDataset
      3 except ImportError:
      4     ImageTilesDataset = None  # type: ignore[assignment, misc]

File ~/Desktop/PhD_projects/Llyod_lab/ReCoDe-spatial-transcriptomics/.venv/lib/python3.12/site-packages/spatialdata/dataloader/datasets.py:14
     12 import pandas as pd
     13 from anndata import AnnData
---> 14 from geopandas import GeoDataFrame
     15 from pandas import CategoricalDtype
     16 from scipy.sparse import issparse

File ~/Desktop/PhD_projects/Llyod_lab/ReCoDe-spatial-transcriptomics/.venv/lib/python3.12/site-packages/geopandas/__init__.py:3
      1 from geopandas._config import options
----> 3 from geopandas.geoseries import GeoSeries
      4 from geopandas.geodataframe import GeoDataFrame
      5 from geopandas.array import points_from_xy

File ~/Desktop/PhD_projects/Llyod_lab/ReCoDe-spatial-transcriptomics/.venv/lib/python3.12/site-packages/geopandas/geoseries.py:17
     14 from shapely.geometry.base import BaseGeometry
     16 import geopandas
---> 17 from geopandas.base import GeoPandasBase, _delegate_property
     18 from geopandas.explore import _explore_geoseries
     19 from geopandas.plotting import plot_series

File ~/Desktop/PhD_projects/Llyod_lab/ReCoDe-spatial-transcriptomics/.venv/lib/python3.12/site-packages/geopandas/base.py:11
      8 from shapely.geometry import MultiPoint, box
      9 from shapely.geometry.base import BaseGeometry
---> 11 from . import _compat as compat
     12 from .array import GeometryArray, GeometryDtype, points_from_xy
     15 def is_geometry_type(data):

File ~/Desktop/PhD_projects/Llyod_lab/ReCoDe-spatial-transcriptomics/.venv/lib/python3.12/site-packages/geopandas/_compat.py:67
     63 # -----------------------------------------------------------------------------
     64 # pyproj compat
     65 # -----------------------------------------------------------------------------
     66 try:
---> 67     import pyproj  # noqa: F401
     69     HAS_PYPROJ = True
     71 except ImportError as err:

File ~/Desktop/PhD_projects/Llyod_lab/ReCoDe-spatial-transcriptomics/.venv/lib/python3.12/site-packages/pyproj/__init__.py:34
      1 """
      2 Python interface to PROJ (https://proj.org),
      3 cartographic projections and coordinate transformations library.
   (...)     29 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     30 """
     32 import warnings
---> 34 import pyproj.network
     35 from pyproj._context import (  # noqa: F401 pylint: disable=unused-import
     36     set_use_global_context,
     37 )
     38 from pyproj._show_versions import (  # noqa: F401 pylint: disable=unused-import
     39     show_versions,
     40 )

File ~/Desktop/PhD_projects/Llyod_lab/ReCoDe-spatial-transcriptomics/.venv/lib/python3.12/site-packages/pyproj/network.py:10
      6 from pathlib import Path
      8 import certifi
---> 10 from pyproj._context import _set_context_ca_bundle_path
     11 from pyproj._network import (  # noqa: F401 pylint: disable=unused-import
     12     is_network_enabled,
     13     set_network_enabled,
     14 )
     17 def set_ca_bundle_path(ca_bundle_path: Path | str | bool | None = None) -> None:

File <frozen importlib._bootstrap>:645, in parent(self)

KeyboardInterrupt:

In [5]:

Copied!

# Set variables
module_name = "6_muspan"  # name of the module
# Set variables
module_name = "6_muspan"  # name of the module

In [6]:

Copied!





# Set directories
base_dir = (
    "/Users/sarapatti/Desktop/PhD_projects/Llyod_lab/ReCoDe-spatial-transcriptomics"
)
input_path = base_dir
output_path = Path(base_dir) / "analysis"
logging_path = Path(output_path) / "logging"
zarr_path = Path(input_path) / "data/xenium.zarr"

# Confirm directories exist
if not Path(input_path).exists():
    raise FileNotFoundError(f"Input path {input_path} does not exist.")
if not Path(output_path).exists():
    raise FileNotFoundError(f"Output path {output_path} does not exist.")
if not Path(zarr_path).exists():
    raise FileNotFoundError(f"Logging path {zarr_path} does not exist.")
# Set directories
base_dir = (
    "/Users/sarapatti/Desktop/PhD_projects/Llyod_lab/ReCoDe-spatial-transcriptomics"
)
input_path = base_dir
output_path = Path(base_dir) / "analysis"
logging_path = Path(output_path) / "logging"
zarr_path = Path(input_path) / "data/xenium.zarr"

# Confirm directories exist
if not Path(input_path).exists():
    raise FileNotFoundError(f"Input path {input_path} does not exist.")
if not Path(output_path).exists():
    raise FileNotFoundError(f"Output path {output_path} does not exist.")
if not Path(zarr_path).exists():
    raise FileNotFoundError(f"Logging path {zarr_path} does not exist.")

In [7]:

Copied!





# Create output directories if they do not exist
os.makedirs(Path(output_path) / module_name, exist_ok=True)

# Set up logging
os.makedirs(
    logging_path, exist_ok=True
)  # should set up all these directories at the start of the pipeline?
logging.basicConfig(
    filename=Path(logging_path) / f"{module_name}.txt",  # output file
    filemode="w",  # overwrites the file each time
    format="%(asctime)s - %(levelname)s - %(message)s",  # log format
    level=logging.INFO,  # minimum level to log
)


# change directory to output_path/module_name
os.chdir(
    Path(output_path) / module_name
)  # need to so plots save in the correct directory
# Create output directories if they do not exist
os.makedirs(Path(output_path) / module_name, exist_ok=True)

# Set up logging
os.makedirs(
    logging_path, exist_ok=True
)  # should set up all these directories at the start of the pipeline?
logging.basicConfig(
    filename=Path(logging_path) / f"{module_name}.txt",  # output file
    filemode="w",  # overwrites the file each time
    format="%(asctime)s - %(levelname)s - %(message)s",  # log format
    level=logging.INFO,  # minimum level to log
)


# change directory to output_path/module_name
os.chdir(
    Path(output_path) / module_name
)  # need to so plots save in the correct directory

In [8]:

Copied!

# Set colors
color_map = sns.color_palette("Blues", as_cmap=True)
# Set colors
color_map = sns.color_palette("Blues", as_cmap=True)

In [9]:

Copied!





# Load data
adata = sc.read_h5ad(
    "/Users/sarapatti/Desktop/PhD_projects/Llyod_lab/ST_Xenium_Asthma/analysis/h5ad/xenium_annotated.h5ad"
)
# Load data
adata = sc.read_h5ad(
    "/Users/sarapatti/Desktop/PhD_projects/Llyod_lab/ST_Xenium_Asthma/analysis/h5ad/xenium_annotated.h5ad"
)

In [10]:

Copied!





# Import Regis' cluster labeling
matrix_epi = pd.read_excel(
    "/Users/sarapatti/Desktop/PhD_projects/Llyod_lab/ST_Regis_analysis/data/Cell_matrices_D2C/matrix_epithelium_total_filter_asthma.xlsx"
)
matrix_muc = pd.read_excel(
    "/Users/sarapatti/Desktop/PhD_projects/Llyod_lab/ST_Regis_analysis/data/Cell_matrices_D2C/Matrix_muc_gland_total_asthma.xlsx"
)
# Import Regis' cluster labeling
matrix_epi = pd.read_excel(
    "/Users/sarapatti/Desktop/PhD_projects/Llyod_lab/ST_Regis_analysis/data/Cell_matrices_D2C/matrix_epithelium_total_filter_asthma.xlsx"
)
matrix_muc = pd.read_excel(
    "/Users/sarapatti/Desktop/PhD_projects/Llyod_lab/ST_Regis_analysis/data/Cell_matrices_D2C/Matrix_muc_gland_total_asthma.xlsx"
)

In [11]:

Copied!





# Format matrix
matrix_epi.index = matrix_epi["Cell_ID"]
matrix_epi = matrix_epi.rename(columns={"Cluster": "Regis_cluster"})
matrix_epi["Patient"] = matrix_epi["Patient"].astype(str)
matrix_epi["ROI"] = "Epithelium"
matrix_epi
# Format matrix
matrix_epi.index = matrix_epi["Cell_ID"]
matrix_epi = matrix_epi.rename(columns={"Cluster": "Regis_cluster"})
matrix_epi["Patient"] = matrix_epi["Patient"].astype(str)
matrix_epi["ROI"] = "Epithelium"
matrix_epi

Out[11]:

	Cell_ID	Regis_cluster	Pathology	Patient	Transcripts	ACE	ACE2	ACKR1	ADAM17	ADAM28	...	UPK3B	VEGFA	VEGFC	VSIG4	VWF	WFS1	WNT2	WT1	XCR1	ROI
Cell_ID
aadeodah-1	aadeodah-1	Basal cells 1	Healthy	110032	87	0	0	0	1	0	...	0	0	0	0	0	0	0	0	0	Epithelium
aadnbbbk-1	aadnbbbk-1	Basal cells 1	Healthy	110032	49	0	0	0	0	1	...	0	0	0	0	0	0	0	0	0	Epithelium
aadonleh-1	aadonleh-1	Basal cells 1	Healthy	110032	48	0	0	0	0	2	...	0	0	0	0	0	0	0	0	0	Epithelium
aadpbghk-1	aadpbghk-1	Basal cells 1	Healthy	110032	19	0	0	0	0	0	...	0	0	0	0	0	1	0	0	0	Epithelium
aafjimli-1	aafjimli-1	Basal cells 1	Healthy	110032	113	0	0	0	0	1	...	1	0	0	0	0	0	0	0	0	Epithelium
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
hkcjgbai-1	hkcjgbai-1	SMCs 4	Asthma	110001	28	1	0	0	0	0	...	0	0	0	0	0	0	0	0	0	Epithelium
manilhbg-1	manilhbg-1	SMCs 4	Asthma	110007	8	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	Epithelium
mapjnddn-1	mapjnddn-1	SMCs 4	Asthma	110007	36	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	Epithelium
mapkfdhd-1	mapkfdhd-1	SMCs 4	Asthma	110007	13	0	0	0	0	0	...	0	0	0	1	0	0	0	0	0	Epithelium
mbahoini-1	mbahoini-1	SMCs 4	Asthma	110007	32	0	1	0	0	1	...	0	0	0	0	0	0	0	0	0	Epithelium

23105 rows × 345 columns

In [12]:

Copied!





matrix_muc.index = matrix_muc["Cell_ID"]
matrix_muc = matrix_muc.rename(columns={"Cluster": "Regis_cluster"})
matrix_muc["Patient"] = matrix_muc["Patient"].astype(str)
matrix_muc["ROI"] = "Mucosal_gland"
matrix_muc
matrix_muc.index = matrix_muc["Cell_ID"]
matrix_muc = matrix_muc.rename(columns={"Cluster": "Regis_cluster"})
matrix_muc["Patient"] = matrix_muc["Patient"].astype(str)
matrix_muc["ROI"] = "Mucosal_gland"
matrix_muc

Out[12]:

	Cell_ID	Regis_cluster	Transcripts	Patient	Pathology	ACE	ACE2	ACKR1	ADAM17	ADAM28	...	UPK3B	VEGFA	VEGFC	VSIG4	VWF	WFS1	WNT2	WT1	XCR1	ROI
Cell_ID
fjkkcgcb-1	fjkkcgcb-1	Basal cells 1	21	110032	Healthy	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	Mucosal_gland
fjkmecdi-1	fjkmecdi-1	Basal cells 1	20	110032	Healthy	0	0	0	0	0	...	0	0	0	0	0	0	1	0	0	Mucosal_gland
fkeggnmf-1	fkeggnmf-1	Basal cells 1	35	110032	Healthy	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	Mucosal_gland
fkehnhkj-1	fkehnhkj-1	Basal cells 1	72	110032	Healthy	0	0	0	0	1	...	0	0	0	0	0	0	0	0	0	Mucosal_gland
knndgahg-1	knndgahg-1	Basal cells 1	27	110040	Healthy	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	Mucosal_gland
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
gfpadpfl-1	gfpadpfl-1	SMCs 4	18	110001	Asthma	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	Mucosal_gland
gfpajmjo-1	gfpajmjo-1	SMCs 4	26	110001	Asthma	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	Mucosal_gland
gfpbphdi-1	gfpbphdi-1	SMCs 4	52	110001	Asthma	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	Mucosal_gland
hngbeaef-1	hngbeaef-1	SMCs 4	7	110001	Asthma	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	Mucosal_gland
lobdedfn-1	lobdedfn-1	SMCs 4	43	110007	Asthma	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	Mucosal_gland

19583 rows × 345 columns

In [13]:

Copied!





# Combine both matrices
matrix = pd.concat([matrix_epi, matrix_muc], axis=0)  # combine based on columns
print("Number of cells from each", matrix["ROI"].value_counts())

matrix = matrix[["Cell_ID", "ROI", "Regis_cluster", "Pathology", "Patient"]]
matrix.rename(columns={matrix.columns[0]: "cell_id"}, inplace=True)
print(matrix["cell_id"].duplicated().sum())
matrix = matrix.drop_duplicates(subset="cell_id")
# Combine both matrices
matrix = pd.concat([matrix_epi, matrix_muc], axis=0)  # combine based on columns
print("Number of cells from each", matrix["ROI"].value_counts())

matrix = matrix[["Cell_ID", "ROI", "Regis_cluster", "Pathology", "Patient"]]
matrix.rename(columns={matrix.columns[0]: "cell_id"}, inplace=True)
print(matrix["cell_id"].duplicated().sum())
matrix = matrix.drop_duplicates(subset="cell_id")

Number of cells from each ROI
Epithelium       23105
Mucosal_gland    19583
Name: count, dtype: int64
6

In [14]:

Copied!

adata.obs = adata.obs.join(matrix.set_index("cell_id"), how="left")
adata.obs
adata.obs = adata.obs.join(matrix.set_index("cell_id"), how="left")
adata.obs

Out[14]:

	cell_id	x_centroid	y_centroid	transcript_counts	control_probe_counts	control_codeword_counts	unassigned_codeword_counts	deprecated_codeword_counts	total_counts	cell_area	...	pct_counts_in_top_10_genes	pct_counts_in_top_20_genes	pct_counts_in_top_50_genes	n_counts	leiden_1.0	new_clusters	ROI	Regis_cluster	Pathology	Patient
aaadnhbi-1	aaadnhbi-1	1647.581299	5919.916992	14	0	0	0	0	14.0	804.232842	...	92.857143	100.000000	100.0000	14.0	5	5	Epithelium	SMCs 1	Healthy	110028
aaaoajok-1	aaaoajok-1	2413.369385	3389.042480	15	0	0	0	0	15.0	93.789535	...	66.666667	100.000000	100.0000	15.0	15	15	Epithelium	Cluster 3	Healthy	110032
aabgppbe-1	aabgppbe-1	2429.717041	3389.562744	36	0	0	0	0	36.0	263.215791	...	61.111111	88.888889	100.0000	36.0	7	SMC	Epithelium	Endothelial cells 2	Healthy	110032
aabhhaho-1	aabhhaho-1	2443.484375	3395.104492	22	0	0	0	0	22.0	160.304693	...	63.636364	100.000000	100.0000	22.0	7	SMC	Epithelium	Endothelial cells 2	Healthy	110032
aacnpnlp-1	aacnpnlp-1	2494.154541	3380.241211	11	0	0	0	0	11.0	148.699537	...	100.000000	100.000000	100.0000	11.0	3	Fibroblasts	Epithelium	Fibroblasts	Healthy	110032
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
ojcbmknb-1	ojcbmknb-1	8341.762695	20044.416016	37	0	0	0	0	37.0	110.497348	...	37.837838	64.864865	100.0000	37.0	10	10	NaN	NaN	NaN	NaN
ojcdjpnk-1	ojcdjpnk-1	8348.761719	20057.220703	39	0	0	0	0	39.0	261.815947	...	33.333333	58.974359	100.0000	39.0	12	NK cells	NaN	NaN	NaN	NaN
ojcdodjh-1	ojcdodjh-1	8351.383789	20081.843750	18	0	0	0	0	18.0	199.680945	...	66.666667	100.000000	100.0000	18.0	6	6	NaN	NaN	NaN	NaN
ojcgaphj-1	ojcgaphj-1	8350.867188	20041.492188	21	0	0	0	0	21.0	63.083284	...	47.619048	95.238095	100.0000	21.0	13	Macrophages	NaN	NaN	NaN	NaN
ojcgnoil-1	ojcgnoil-1	8354.194336	20048.212891	64	0	0	0	0	64.0	82.048909	...	26.562500	42.187500	89.0625	64.0	0	Pericyte/SMC	NaN	NaN	NaN	NaN

53468 rows × 24 columns

In [15]:

Copied!





# Format Regis_cluster column to be a string
adata.obs["Regis_cluster"] = adata.obs["Regis_cluster"].astype(
    "str"
)  # convert to string

adata.obs.dtypes
adata.obs["Regis_cluster"].value_counts()
# Format Regis_cluster column to be a string
adata.obs["Regis_cluster"] = adata.obs["Regis_cluster"].astype(
    "str"
)  # convert to string

adata.obs.dtypes
adata.obs["Regis_cluster"].value_counts()

Out[15]:

Regis_cluster
nan                             15407
Cluster 1                        7021
Cluster 2                        6672
Cluster 3                        3402
Endothelial cells 1              2843
Fibroblasts                      2581
Basal cells 1                    2470
Basal cells 2                    2148
Mast cells                       1923
SMCs 1                           1532
Ciliated cells 2                 1302
Goblet cells (Epithelium)        1142
Serous cells 1                   1125
Ciliated cells 1                 1068
Goblet cells (Mucous glands)      761
SMCs 2 Des+                       527
Serous cells 2                    490
Endothelial cells 2               424
Cluster 12                        320
SMCs 4                            294
SMCs 3 (Mucous glands)             16
Name: count, dtype: int64

In [16]:

Copied!





# Import Xenium data using muspan
logging.info("Loading Xenium data for MuSpAn...")

area_path = Path(input_path) / "data/regis_d1.csv"
transcripts_of_interest = ["EPCAM", "CD3D", "CD68", "VWF", "PTPRC", "ACTA2"]

pc = ms.io.xenium_to_domain(
    path_to_xenium_data="/Users/sarapatti/Desktop/PhD_projects/Llyod_lab/ST_Xenium_Asthma/data/catalyst_release_RJ_IMPERIAL_Dec05/0013717_asthma_healthy",
    cells_from_selection_csv=area_path,
    domain_name="Regis",
    load_transcripts=True,
    selected_transcripts=transcripts_of_interest,
    load_nuclei=True,
    load_cells_as_shapes=True,
    exclude_no_nuclei_cells=True,
)
# Import Xenium data using muspan
logging.info("Loading Xenium data for MuSpAn...")

area_path = Path(input_path) / "data/regis_d1.csv"
transcripts_of_interest = ["EPCAM", "CD3D", "CD68", "VWF", "PTPRC", "ACTA2"]

pc = ms.io.xenium_to_domain(
    path_to_xenium_data="/Users/sarapatti/Desktop/PhD_projects/Llyod_lab/ST_Xenium_Asthma/data/catalyst_release_RJ_IMPERIAL_Dec05/0013717_asthma_healthy",
    cells_from_selection_csv=area_path,
    domain_name="Regis",
    load_transcripts=True,
    selected_transcripts=transcripts_of_interest,
    load_nuclei=True,
    load_cells_as_shapes=True,
    exclude_no_nuclei_cells=True,
)

Domain name: Regis
Number of objects: 19486
Collections: ['Cell boundaries', 'Nucleus boundaries', 'Transcripts']
Labels: ['Cell ID', 'Transcript Counts', 'Cell Area', 'Cluster ID', 'Nucleus Area', 'Transcript ID'] 
Networks: [] 
Distance matrices: []

In [17]:

Copied!





# ...existing code...
cell_ids = pc.labels["Cell ID"]["labels"]

# Build a list of labels for ALL cell_ids, using "NA" if not found in adata.obs
filtered_regis_clusters = [
    str(adata.obs.loc[cid, "Regis_cluster"]) if cid in adata.obs.index else "NA"
    for cid in cell_ids
]

# Now filtered_regis_clusters will have the same length as cell_ids
pc.add_labels(label_name="Regis_cluster", labels=filtered_regis_clusters)
# ...existing code...
cell_ids = pc.labels["Cell ID"]["labels"]

# Build a list of labels for ALL cell_ids, using "NA" if not found in adata.obs
filtered_regis_clusters = [
    str(adata.obs.loc[cid, "Regis_cluster"]) if cid in adata.obs.index else "NA"
    for cid in cell_ids
]

# Now filtered_regis_clusters will have the same length as cell_ids
pc.add_labels(label_name="Regis_cluster", labels=filtered_regis_clusters)

In [18]:

Copied!

pc.collections["Transcripts"]
pc.collections["Transcripts"]

Out[18]:

{'collection_name': 'Transcripts',
 'collection_name_integer': 2,
 'objects': array([14206., 14207., 14208., ..., 19483., 19484., 19485.])}

In [19]:

Copied!

# Queries to isolate the different objects within the MuSpAn domain

# Query to isolate cell boundaries
qCells = ms.query.query(pc, ("Collection",), "is", "Cell boundaries")

# Query to isolate transcripts
qTrans = ms.query.query(pc, ("Collection",), "is", "Transcripts")

# Query to isolate nucleus boundaries
qNuc = ms.query.query(pc, ("Collection",), "is", "Nucleus boundaries")
# Queries to isolate the different objects within the MuSpAn domain

# Query to isolate cell boundaries
qCells = ms.query.query(pc, ("Collection",), "is", "Cell boundaries")

# Query to isolate transcripts
qTrans = ms.query.query(pc, ("Collection",), "is", "Transcripts")

# Query to isolate nucleus boundaries
qNuc = ms.query.query(pc, ("Collection",), "is", "Nucleus boundaries")

In [20]:

Copied!

print(pc.labels.keys())  # see all keys in the labels dictionary
print(pc.labels.keys())  # see all keys in the labels dictionary

dict_keys(['Cell ID', 'Transcript Counts', 'Cell Area', 'Cluster ID', 'Nucleus Area', 'Transcript ID', 'Regis_cluster'])

In [21]:

Copied!

pc.labels["Cluster ID"]
pc.labels["Cluster ID"]

Out[21]:

{'labels': array(['Cluster 5', 'Cluster 4', 'Cluster 6', ..., 'Cluster 1',
        'Cluster 3', 'Cluster 3'], dtype='<U10'),
 'objects': array([0.000e+00, 1.000e+00, 2.000e+00, ..., 7.100e+03, 7.101e+03,
        7.102e+03]),
 'labelType': 'categorical',
 'categories': array(['Cluster 1', 'Cluster 10', 'Cluster 11', 'Cluster 12',
        'Cluster 13', 'Cluster 14', 'Cluster 15', 'Cluster 16',
        'Cluster 17', 'Cluster 18', 'Cluster 2', 'Cluster 3', 'Cluster 4',
        'Cluster 5', 'Cluster 6', 'Cluster 7', 'Cluster 8', 'Cluster 9',
        'Unassigned'], dtype='<U10'),
 'nCategories': 19,
 'labelToInteger': {'Cluster 1': 0,
  'Cluster 10': 1,
  'Cluster 11': 2,
  'Cluster 12': 3,
  'Cluster 13': 4,
  'Cluster 14': 5,
  'Cluster 15': 6,
  'Cluster 16': 7,
  'Cluster 17': 8,
  'Cluster 18': 9,
  'Cluster 2': 10,
  'Cluster 3': 11,
  'Cluster 4': 12,
  'Cluster 5': 13,
  'Cluster 6': 14,
  'Cluster 7': 15,
  'Cluster 8': 16,
  'Cluster 9': 17,
  'Unassigned': 18},
 'integerToLabel': {0: 'Cluster 1',
  1: 'Cluster 10',
  2: 'Cluster 11',
  3: 'Cluster 12',
  4: 'Cluster 13',
  5: 'Cluster 14',
  6: 'Cluster 15',
  7: 'Cluster 16',
  8: 'Cluster 17',
  9: 'Cluster 18',
  10: 'Cluster 2',
  11: 'Cluster 3',
  12: 'Cluster 4',
  13: 'Cluster 5',
  14: 'Cluster 6',
  15: 'Cluster 7',
  16: 'Cluster 8',
  17: 'Cluster 9',
  18: 'Unassigned'},
 'numericalLabels': array([13, 12, 14, ...,  0, 11, 11])}

In [22]:

Copied!





# Create a figure with 2x2 subplots
fig, ax = plt.subplots(figsize=(20, 15), nrows=2, ncols=2)

# Visualise all objects in the MuSpAn domain
ms.visualise.visualise(pc, ax=ax[0, 0], marker_size=0.05)
ax[0, 0].set_title("All objects")

# Visualise cells, colored by 'Regis_cluster'
ms.visualise.visualise(
    pc, color_by=("label", "Regis_cluster"), ax=ax[0, 1], objects_to_plot=qCells
)
ax[0, 1].set_title("Cells")

# Visualise transcripts, colored by 'Transcript'
ms.visualise.visualise(
    pc,
    color_by=("label", "Transcript ID"),
    ax=ax[1, 0],
    objects_to_plot=qTrans,
    marker_size=5,
)
ax[1, 0].set_title("Transcripts")

# Visualise nuclei, colored by 'Nucleus Area'
ms.visualise.visualise(
    pc,
    color_by=("label", "Nucleus Area"),
    ax=ax[1, 1],
    objects_to_plot=qNuc,
    # vmin=20,
    # vmax=200
)
ax[1, 1].set_title("Nuclei")
# Create a figure with 2x2 subplots
fig, ax = plt.subplots(figsize=(20, 15), nrows=2, ncols=2)

# Visualise all objects in the MuSpAn domain
ms.visualise.visualise(pc, ax=ax[0, 0], marker_size=0.05)
ax[0, 0].set_title("All objects")

# Visualise cells, colored by 'Regis_cluster'
ms.visualise.visualise(
    pc, color_by=("label", "Regis_cluster"), ax=ax[0, 1], objects_to_plot=qCells
)
ax[0, 1].set_title("Cells")

# Visualise transcripts, colored by 'Transcript'
ms.visualise.visualise(
    pc,
    color_by=("label", "Transcript ID"),
    ax=ax[1, 0],
    objects_to_plot=qTrans,
    marker_size=5,
)
ax[1, 0].set_title("Transcripts")

# Visualise nuclei, colored by 'Nucleus Area'
ms.visualise.visualise(
    pc,
    color_by=("label", "Nucleus Area"),
    ax=ax[1, 1],
    objects_to_plot=qNuc,
    # vmin=20,
    # vmax=200
)
ax[1, 1].set_title("Nuclei")

Out[22]:

Text(0.5, 1.0, 'Nuclei')

No description has been provided for this image

Creating spatial networks¶

Delaunay networks from point-like data¶

If no boundary information about our spatial objects is given in our dataset (i.e., no segmentation mask), then Deluanay networks on point-like data are a common approximation of the local connectivity of the data. It’s construction is based on area (volume) filling process between all points such that edges are generated to produce triangles that do not contain any other point, therefore an edge between points presents the adjacency of the voronoi cells of the data.

Edge weights are values that can be used to describe the connectivity between nodes.

In [23]:

Copied!





# Convert cell boundaries to cell centres (centroids)
pc.convert_objects(
    population=("Collection", "Cell boundaries"),
    object_type="point",
    conversion_method="centroids",
    collection_name="Cell centroids",
    inherit_collections=False,
)

# Plot the cell centres with color based on 'Cluster ID'
plt.figure(figsize=(10, 6))
ms.visualise.visualise(
    pc,
    objects_to_plot=("collection", "Cell centroids"),
    color_by="Regis_cluster",
    ax=plt.gca(),
    marker_size=3,
)
# Convert cell boundaries to cell centres (centroids)
pc.convert_objects(
    population=("Collection", "Cell boundaries"),
    object_type="point",
    conversion_method="centroids",
    collection_name="Cell centroids",
    inherit_collections=False,
)

# Plot the cell centres with color based on 'Cluster ID'
plt.figure(figsize=(10, 6))
ms.visualise.visualise(
    pc,
    objects_to_plot=("collection", "Cell centroids"),
    color_by="Regis_cluster",
    ax=plt.gca(),
    marker_size=3,
)

Out[23]:

(<Figure size 1000x600 with 2 Axes>, <Axes: >)

In [24]:

Copied!





del_network = ms.networks.generate_network(
    pc,
    network_name="Delaunay CC unfiltered",
    network_type="Delaunay",
    objects_as_nodes=("collection", "Cell centroids"),
)

# Generate a Delaunay network on the 'Cell centroids' data
# The network will be stored in the domain.networks dictionary with the name 'Delaunay CC'
del_network = ms.networks.generate_network(
    pc,
    network_name="Delaunay CC filtered",
    network_type="Delaunay",
    objects_as_nodes=("collection", "Cell centroids"),
    min_edge_distance=0,
    max_edge_distance=45,
)
del_network = ms.networks.generate_network(
    pc,
    network_name="Delaunay CC unfiltered",
    network_type="Delaunay",
    objects_as_nodes=("collection", "Cell centroids"),
)

# Generate a Delaunay network on the 'Cell centroids' data
# The network will be stored in the domain.networks dictionary with the name 'Delaunay CC'
del_network = ms.networks.generate_network(
    pc,
    network_name="Delaunay CC filtered",
    network_type="Delaunay",
    objects_as_nodes=("collection", "Cell centroids"),
    min_edge_distance=0,
    max_edge_distance=45,
)

In [25]:

Copied!

next(iter(del_network.edges(data=True)))
next(iter(del_network.edges(data=True)))

Out[25]:

(19486,
 20738,
 {'Distance': 9.143756588182411, 'Inverse Distance': 0.5516394172576841})

In [26]:

Copied!

print("Delaunay CC unfiltered:", pc.networks["Delaunay CC unfiltered"])
print("Delaunay CC filtered:", pc.networks["Delaunay CC filtered"])
print("Delaunay CC unfiltered:", pc.networks["Delaunay CC unfiltered"])
print("Delaunay CC filtered:", pc.networks["Delaunay CC filtered"])

Delaunay CC unfiltered: Graph with 7103 nodes and 21274 edges
Delaunay CC filtered: Graph with 7103 nodes and 20940 edges

In [27]:

Copied!





# Create a 1x2 subplot for visualizing the original and filtered Delaunay networks
fig, ax = plt.subplots(1, 2, figsize=(20, 12))

# Plot the original Delaunay network
ax[0].set_title("Delaunay CC unfiltered")
ms.visualise.visualise_network(
    pc,
    network_name="Delaunay CC unfiltered",
    ax=ax[0],
    edge_weight_name=None,
    visualise_kwargs=dict(
        objects_to_plot=("collection", "Cell centroids"),
        marker_size=5,
        add_cbar=False,
        color_by=("constant", "#4D7EAB"),
    ),
)

# Plot the filtered Delaunay network
ax[1].set_title("Delaunay CC filtered")
ms.visualise.visualise_network(
    pc,
    network_name="Delaunay CC filtered",
    ax=ax[1],
    edge_weight_name=None,
    visualise_kwargs=dict(
        objects_to_plot=("collection", "Cell centroids"),
        marker_size=5,
        add_cbar=False,
        color_by=("constant", "#4D7EAB"),
    ),
)
# Create a 1x2 subplot for visualizing the original and filtered Delaunay networks
fig, ax = plt.subplots(1, 2, figsize=(20, 12))

# Plot the original Delaunay network
ax[0].set_title("Delaunay CC unfiltered")
ms.visualise.visualise_network(
    pc,
    network_name="Delaunay CC unfiltered",
    ax=ax[0],
    edge_weight_name=None,
    visualise_kwargs=dict(
        objects_to_plot=("collection", "Cell centroids"),
        marker_size=5,
        add_cbar=False,
        color_by=("constant", "#4D7EAB"),
    ),
)

# Plot the filtered Delaunay network
ax[1].set_title("Delaunay CC filtered")
ms.visualise.visualise_network(
    pc,
    network_name="Delaunay CC filtered",
    ax=ax[1],
    edge_weight_name=None,
    visualise_kwargs=dict(
        objects_to_plot=("collection", "Cell centroids"),
        marker_size=5,
        add_cbar=False,
        color_by=("constant", "#4D7EAB"),
    ),
)

Out[27]:

(<Figure size 2000x1200 with 2 Axes>,
 <Axes: title={'center': 'Delaunay CC filtered'}>)

Proximity based networks¶

Proximity networks are purely distance-based networks. Formally, in a Proximity network, any two nodes are connected if the distance between is within a specified range, which is defined in our case by max_edge_distance and min_edge_distance. These networks are particularly useful in spatial analysis where the physical distance between objects is a key factor. By adjusting the distance thresholds, we can explore how connectivity changes at different spatial scales. This can help in understanding the spatial organization and interactions within the dataset.

Point-like objects¶

In [28]:

Copied!





# Generate proximity networks with different maximum edge distances

distance_list = [10, 20, 50]
for distance in distance_list:
    ms.networks.generate_network(
        pc,
        network_name=f"prox network centroids {distance}",
        network_type="Proximity",
        objects_as_nodes=("collection", "Cell centroids"),
        max_edge_distance=distance,
        min_edge_distance=0,
    )
# Generate proximity networks with different maximum edge distances

distance_list = [10, 20, 50]
for distance in distance_list:
    ms.networks.generate_network(
        pc,
        network_name=f"prox network centroids {distance}",
        network_type="Proximity",
        objects_as_nodes=("collection", "Cell centroids"),
        max_edge_distance=distance,
        min_edge_distance=0,
    )

In [29]:

Copied!





# Create a figure with 3 subplots arranged in a single row
fig, ax = plt.subplots(1, 3, figsize=(20, 6))

# Plot the proximity network with 10μm max distance
ax[0].set_title(f"Proximity network: {distance_list[0]} max distance")
ms.visualise.visualise_network(
    pc,
    network_name=f"prox network centroids {distance_list[0]}",
    ax=ax[0],
    edge_cmap=color_map,
    edge_vmin=0,
    edge_vmax=distance_list[0],
    add_cbar=False,
    visualise_kwargs=dict(
        objects_to_plot=("collection", "Cell centroids"),
        marker_size=0.5,
        color_by=("constant", "black"),
    ),
)

# Plot the proximity network with 10μm max distance
ax[1].set_title(f"Proximity network: {distance_list[1]} max distance")
ms.visualise.visualise_network(
    pc,
    network_name=f"prox network centroids {distance_list[1]}",
    ax=ax[1],
    edge_cmap=color_map,
    edge_vmin=0,
    edge_vmax=distance_list[1],
    add_cbar=False,
    visualise_kwargs=dict(
        objects_to_plot=("collection", "Cell centroids"),
        marker_size=0.5,
        color_by=("constant", "black"),
    ),
)

# Plot the proximity network with 10μm max distance
ax[2].set_title(f"Proximity network: {distance_list[2]} max distance")
ms.visualise.visualise_network(
    pc,
    network_name=f"prox network centroids {distance_list[2]}",
    ax=ax[2],
    edge_cmap=color_map,
    edge_vmin=0,
    edge_vmax=distance_list[2],
    add_cbar=True,
    visualise_kwargs=dict(
        objects_to_plot=("collection", "Cell centroids"),
        marker_size=0.5,
        color_by=("constant", "black"),
    ),
)
# Create a figure with 3 subplots arranged in a single row
fig, ax = plt.subplots(1, 3, figsize=(20, 6))

# Plot the proximity network with 10μm max distance
ax[0].set_title(f"Proximity network: {distance_list[0]} max distance")
ms.visualise.visualise_network(
    pc,
    network_name=f"prox network centroids {distance_list[0]}",
    ax=ax[0],
    edge_cmap=color_map,
    edge_vmin=0,
    edge_vmax=distance_list[0],
    add_cbar=False,
    visualise_kwargs=dict(
        objects_to_plot=("collection", "Cell centroids"),
        marker_size=0.5,
        color_by=("constant", "black"),
    ),
)

# Plot the proximity network with 10μm max distance
ax[1].set_title(f"Proximity network: {distance_list[1]} max distance")
ms.visualise.visualise_network(
    pc,
    network_name=f"prox network centroids {distance_list[1]}",
    ax=ax[1],
    edge_cmap=color_map,
    edge_vmin=0,
    edge_vmax=distance_list[1],
    add_cbar=False,
    visualise_kwargs=dict(
        objects_to_plot=("collection", "Cell centroids"),
        marker_size=0.5,
        color_by=("constant", "black"),
    ),
)

# Plot the proximity network with 10μm max distance
ax[2].set_title(f"Proximity network: {distance_list[2]} max distance")
ms.visualise.visualise_network(
    pc,
    network_name=f"prox network centroids {distance_list[2]}",
    ax=ax[2],
    edge_cmap=color_map,
    edge_vmin=0,
    edge_vmax=distance_list[2],
    add_cbar=True,
    visualise_kwargs=dict(
        objects_to_plot=("collection", "Cell centroids"),
        marker_size=0.5,
        color_by=("constant", "black"),
    ),
)

Out[29]:

(<Figure size 2000x600 with 4 Axes>,
 <Axes: title={'center': 'Proximity network: 50 max distance'}>)

Shape-like objects¶

Note that the ‘Contact network’ is defined between the ‘Cell boundary’ objects but edges will be drawn from their centroids. For purely visualisation purposes, we include the ‘Cell centroids’ on the plot.

In [30]:

Copied!





# Generate a proximity network from shape-like objects (Cell boundaries)
# with a maximum edge distance of 1μm

ms.networks.generate_network(
    pc,
    network_name="Contact network",
    network_type="Proximity",
    objects_as_nodes=("collection", "Cell boundaries"),
    max_edge_distance=1,
    min_edge_distance=0,
)
# Generate a proximity network from shape-like objects (Cell boundaries)
# with a maximum edge distance of 1μm

ms.networks.generate_network(
    pc,
    network_name="Contact network",
    network_type="Proximity",
    objects_as_nodes=("collection", "Cell boundaries"),
    max_edge_distance=1,
    min_edge_distance=0,
)

Warning: Performing a proximity network on a large number (>5000) of shapes may be computationally expensive. Consider using network_type="Delaunay" to appromxiate proxmity interactions using cell centroids.

---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
Cell In [30], line 4
      1 # Generate a proximity network from shape-like objects (Cell boundaries)
      2 # with a maximum edge distance of 1μm
----> 4 ms.networks.generate_network(
      5     pc,
      6     network_name='Contact network',
      7     network_type='Proximity',
      8     objects_as_nodes=('collection', 'Cell boundaries'),
      9     max_edge_distance=1,
     10     min_edge_distance=0
     11 )

File /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/muspan/networks/generate_network.py:300, in generate_network(domain, network_name, objects_as_nodes, include_boundaries, exclude_boundaries, boundary_exclude_distance, network_type, distance_weighted, inverse_distance_function, min_edge_distance, max_edge_distance, number_of_nearest_neighbours, store_network)
    297 extra_valid_query_indices = valid_query_indices[extra_validation_indices]
    298 if len(extra_valid_query_indices) > 0:
    299     #this is now the bottleneck for any shape or line objects 
--> 300     object_distance = object_to_object_distance_matrix(domain, [object_id], object_indices[extra_valid_query_indices], bypass_checks=True)[0, :]
    302     final_validation_indices = (object_distance >= min_edge_distance) & (object_distance <= max_edge_distance)
    303     final_valid_query_indices = extra_valid_query_indices[final_validation_indices]

File /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/muspan/helpers/object_to_object_distance_matrix.py:177, in object_to_object_distance_matrix(domain, population_A, population_B, overwrite_cache, bypass_checks, distance_metric)
    175                             break
    176                         poly_B = verts_to_poly(b_coords[count_b])
--> 177                         d = distance_polygon_to_polygon(poly_A, poly_B)[0]
    178                         distance_matrix[ai,bi] = d
    179                         #TODO implement internal holes for shape to shape distances
    180                         
    181 # Update cache

File /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/muspan/shape_operations/helpers/distance_polygon_to_polygon.py:51, in distance_polygon_to_polygon(polygon_edges_1, polygon_edges_2)
     49     mindist = d1
     50 # vertices of poly 2 to edges of poly 1
---> 51 d2, (point, closepoint) = distance_point_to_line_segment(e2[0],e1[0],e1[1])
     52 if d2 < mindist:
     53     (closepoint1, closepoint2) = (point, closepoint)

File /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/muspan/shape_operations/helpers/distance_point_to_line_segment.py:37, in distance_point_to_line_segment(point, v1, v2)
     35 if seglengthsquared == 0:
     36     assert(1==2)
---> 37 r = np.dot(v2-v1,point-v1)/seglengthsquared
     38 if r <= 0:
     39     d = np.sqrt((point-v1).dot(point-v1))

File /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/numpy/core/multiarray.py:741, in dot(a, b, out)
    671     """
    672     result_type(*arrays_and_dtypes)
    673 
   (...)
    736 
    737     """
    738     return arrays_and_dtypes
--> 741 @array_function_from_c_func_and_dispatcher(_multiarray_umath.dot)
    742 def dot(a, b, out=None):
    743     """
    744     dot(a, b, out=None)
    745 
   (...)
    829 
    830     """
    831     return (a, b, out)

KeyboardInterrupt:

In [ ]:

Copied!





# Create a figure with a single subplot
fig, ax = plt.subplots(1, 1, figsize=(6, 6))

# Plot the cell boundaries underneath the network
ms.visualise.visualise(
    pc,
    ax=ax,
    objects_to_plot=("collection", "Cell boundaries"),
    marker_size=10,
    add_cbar=False,
    shape_kwargs={"color": "#4D7EAB", "alpha": 0.4, "edgecolor": "black"},
)

# Plot the contact network on top of the cell boundaries
ms.visualise.visualise_network(
    pc,
    network_name="Contact network",
    ax=ax,
    edge_weight_name=None,
    visualise_kwargs=dict(
        objects_to_plot=("collection", "Cell centroids"),
        marker_size=0.5,
        color_by=("constant", "black"),
    ),
)
# Create a figure with a single subplot
fig, ax = plt.subplots(1, 1, figsize=(6, 6))

# Plot the cell boundaries underneath the network
ms.visualise.visualise(
    pc,
    ax=ax,
    objects_to_plot=("collection", "Cell boundaries"),
    marker_size=10,
    add_cbar=False,
    shape_kwargs={"color": "#4D7EAB", "alpha": 0.4, "edgecolor": "black"},
)

# Plot the contact network on top of the cell boundaries
ms.visualise.visualise_network(
    pc,
    network_name="Contact network",
    ax=ax,
    edge_weight_name=None,
    visualise_kwargs=dict(
        objects_to_plot=("collection", "Cell centroids"),
        marker_size=0.5,
        color_by=("constant", "black"),
    ),
)

KNN based networks¶

Network_method argument and defining the number of neighbours we’d like to connect for every node using the number_of_nearest_neighbours parameter

In [ ]:

Copied!





# Generate a K-Nearest Neighbours (KNN) network on the 'Cell centroids' data with k neighbors
k_list = [2, 5, 10, 15]

for k in k_list:
    ms.networks.generate_network(
        pc,
        network_name=f"{k}-NN network",
        network_type="KNN",
        objects_as_nodes=("collection", "Cell centroids"),
        number_of_nearest_neighbours=k,
    )
# Generate a K-Nearest Neighbours (KNN) network on the 'Cell centroids' data with k neighbors
k_list = [2, 5, 10, 15]

for k in k_list:
    ms.networks.generate_network(
        pc,
        network_name=f"{k}-NN network",
        network_type="KNN",
        objects_as_nodes=("collection", "Cell centroids"),
        number_of_nearest_neighbours=k,
    )

In [ ]:

Copied!





fig, axes = plt.subplots(1, len(k_list), figsize=(6 * len(k_list), 6))

if len(k_list) == 1:
    axes = [axes]  # Ensure axes is iterable

for i, k in enumerate(k_list):
    axes[i].set_title(f"{k}-NN network")
    ms.visualise.visualise_network(
        pc,
        network_name=f"{k}-NN network",
        ax=axes[i],
        edge_weight_name="Distance",
        edge_cmap=color_map,
        visualise_kwargs=dict(
            objects_to_plot=("collection", "Cell centroids"),
            marker_size=1,
            color_by=("constant", "black"),
        ),
    )

plt.tight_layout()
plt.show()
fig, axes = plt.subplots(1, len(k_list), figsize=(6 * len(k_list), 6))

if len(k_list) == 1:
    axes = [axes]  # Ensure axes is iterable

for i, k in enumerate(k_list):
    axes[i].set_title(f"{k}-NN network")
    ms.visualise.visualise_network(
        pc,
        network_name=f"{k}-NN network",
        ax=axes[i],
        edge_weight_name="Distance",
        edge_cmap=color_map,
        visualise_kwargs=dict(
            objects_to_plot=("collection", "Cell centroids"),
            marker_size=1,
            color_by=("constant", "black"),
        ),
    )

plt.tight_layout()
plt.show()

We can see that each object should be connected to only 10 other objects in our domain

In [ ]:

Copied!





# Initialize an empty list to store the number of neighbours for each node
number_of_neighbours_check = []

# Iterate through each node in the 'f"{k}-NN network"k'
for node in pc.networks[f"{k}-NN network"].nodes():
    # Append the number of neighbours for the current node to the list
    number_of_neighbours_check.append(
        len(list(pc.networks[f"{k}-NN network"].neighbors(node)))
    )

# Print the number of adjacent nodes for the first 100 nodes
print(
    "Number of adjacent nodes for the first 100 nodes:",
    number_of_neighbours_check[:100],
)
# Initialize an empty list to store the number of neighbours for each node
number_of_neighbours_check = []

# Iterate through each node in the 'f"{k}-NN network"k'
for node in pc.networks[f"{k}-NN network"].nodes():
    # Append the number of neighbours for the current node to the list
    number_of_neighbours_check.append(
        len(list(pc.networks[f"{k}-NN network"].neighbors(node)))
    )

# Print the number of adjacent nodes for the first 100 nodes
print(
    "Number of adjacent nodes for the first 100 nodes:",
    number_of_neighbours_check[:100],
)

Spatial Statistics¶

Cross pair correlation function (cross-PCF)¶

The cross pair correlation function (cross-PCF, also known as the Radial Distribution Function) is a spatial statistic that characterises clustering or exclusion at different length scales. Consider two populations of points, that we’ll call and . Then the cross-PCF, , can be thought of as a ratio describing whether the observed number of pairs of points separated by distance (where one is type and the other type ) is higher or lower than would be expected under a statistical null model. In brief, indicates increased numbers of pairs separated by distance , which indicates clustering. suggests exclusion, or regularity.

In [ ]:

Copied!





# # Query the domain for points of Celltype D
# pop = ms.query.query(pc, ('label', 'Regis_cluster'), 'is', 'Mast cells')

# # Calculate the cross-PCF for points of Celltype D with themselves
# # max_R: maximum radius to consider
# # annulus_step: step size for the annulus
# # annulus_width: width of the annulus
# # visualise_output: whether to visualise the output
# r, PCF = ms.spatial_statistics.cross_pair_correlation_function(
#     domain=pc,
#     population_A=pop,
#     population_B=pop,
#     max_R=200,
#     annulus_step=5,
#     annulus_width=25,
#     visualise_output=True
# )
# # Query the domain for points of Celltype D
# pop = ms.query.query(pc, ('label', 'Regis_cluster'), 'is', 'Mast cells')

# # Calculate the cross-PCF for points of Celltype D with themselves
# # max_R: maximum radius to consider
# # annulus_step: step size for the annulus
# # annulus_width: width of the annulus
# # visualise_output: whether to visualise the output
# r, PCF = ms.spatial_statistics.cross_pair_correlation_function(
#     domain=pc,
#     population_A=pop,
#     population_B=pop,
#     max_R=200,
#     annulus_step=5,
#     annulus_width=25,
#     visualise_output=True
# )

In [32]:

Copied!





# Clusters
cluster_labels = "Regis_cluster"
celltypes = pc.labels[cluster_labels]["labels"].tolist()
celltypes = np.unique(celltypes)

# Define the cell types to be analyzed
celltypes = pc.labels[cluster_labels]["labels"].tolist()
celltypes = np.unique(celltypes)
# Clusters
cluster_labels = "Regis_cluster"
celltypes = pc.labels[cluster_labels]["labels"].tolist()
celltypes = np.unique(celltypes)

# Define the cell types to be analyzed
celltypes = pc.labels[cluster_labels]["labels"].tolist()
celltypes = np.unique(celltypes)

In [ ]:

Copied!





# # Create a n x n subplot for visualizing the cross-PCF for each combination of cell types
# fig, axes = plt.subplots(len(celltypes), len(celltypes), figsize=(40, 40))

# # Loop through each combination of cell types
# for i in range(len(celltypes)):
#     for j in range(len(celltypes)):
#         pop_A = ms.query.query(pc, ('label', cluster_labels), 'is', celltypes[i])
#         pop_B = ms.query.query(pc, ('label', cluster_labels), 'is', celltypes[j])
#         r, PCF = ms.spatial_statistics.cross_pair_correlation_function(
#             pc,
#             pop_A,
#             pop_B,
#             max_R=200,
#             annulus_step=5,
#             annulus_width=25
#         )

#         # Select the current subplot
#         ax = axes[i, j]

#         # Plot the cross-PCF
#         ax.plot(r, PCF)

#         # Add a horizontal line at y=1 to indicate the CSR baseline
#         ax.axhline(1, color='k', linestyle=':')

#         # Set the y-axis limit
#         ax.set_ylim([0, 7])

#         # Label the y-axis with the cross-PCF notation
#         ax.set_ylabel(f'$g_{{{celltypes[i]}{celltypes[j]}}}(r)$')

#         # Label the x-axis with the distance r
#         ax.set_xlabel('$r$')

# # Adjust the layout to prevent overlap
# plt.tight_layout()
# # Create a n x n subplot for visualizing the cross-PCF for each combination of cell types
# fig, axes = plt.subplots(len(celltypes), len(celltypes), figsize=(40, 40))

# # Loop through each combination of cell types
# for i in range(len(celltypes)):
#     for j in range(len(celltypes)):
#         pop_A = ms.query.query(pc, ('label', cluster_labels), 'is', celltypes[i])
#         pop_B = ms.query.query(pc, ('label', cluster_labels), 'is', celltypes[j])
#         r, PCF = ms.spatial_statistics.cross_pair_correlation_function(
#             pc,
#             pop_A,
#             pop_B,
#             max_R=200,
#             annulus_step=5,
#             annulus_width=25
#         )

#         # Select the current subplot
#         ax = axes[i, j]

#         # Plot the cross-PCF
#         ax.plot(r, PCF)

#         # Add a horizontal line at y=1 to indicate the CSR baseline
#         ax.axhline(1, color='k', linestyle=':')

#         # Set the y-axis limit
#         ax.set_ylim([0, 7])

#         # Label the y-axis with the cross-PCF notation
#         ax.set_ylabel(f'$g_{{{celltypes[i]}{celltypes[j]}}}(r)$')

#         # Label the x-axis with the distance r
#         ax.set_xlabel('$r$')

# # Adjust the layout to prevent overlap
# plt.tight_layout()

In [ ]:

Copied!





# Query points with labels "Basal cells 1" or "Mast cells"
query_ab = ms.query.query(
    pc, ("label", cluster_labels), "in", ["Basal cells 1", "Mast cells"]
)

ms.visualise.visualise(pc, color_by=("label", cluster_labels), objects_to_plot=query_ab)
# Query points with labels "Basal cells 1" or "Mast cells"
query_ab = ms.query.query(
    pc, ("label", cluster_labels), "in", ["Basal cells 1", "Mast cells"]
)

ms.visualise.visualise(pc, color_by=("label", cluster_labels), objects_to_plot=query_ab)

In [ ]:

Copied!





# Create a n x n subplot for visualizing the cross-PCF for each combination of cell types

pop_A = ms.query.query(pc, ("label", cluster_labels), "is", "Basal cells 1")
pop_B = ms.query.query(pc, ("label", cluster_labels), "is", "Mast cells")

# Calculate the cross-PCF for points of Celltype D with themselves
# max_R: maximum radius to consider
# annulus_step: step size for the annulus
# annulus_width: width of the annulus
# visualise_output: whether to visualise the output
r, PCF = ms.spatial_statistics.cross_pair_correlation_function(
    domain=pc,
    population_A=pop_A,
    population_B=pop_B,
    max_R=200,
    annulus_step=5,
    annulus_width=25,
    visualise_output=True,
)
# Create a n x n subplot for visualizing the cross-PCF for each combination of cell types

pop_A = ms.query.query(pc, ("label", cluster_labels), "is", "Basal cells 1")
pop_B = ms.query.query(pc, ("label", cluster_labels), "is", "Mast cells")

# Calculate the cross-PCF for points of Celltype D with themselves
# max_R: maximum radius to consider
# annulus_step: step size for the annulus
# annulus_width: width of the annulus
# visualise_output: whether to visualise the output
r, PCF = ms.spatial_statistics.cross_pair_correlation_function(
    domain=pc,
    population_A=pop_A,
    population_B=pop_B,
    max_R=200,
    annulus_step=5,
    annulus_width=25,
    visualise_output=True,
)

The weighted pair correlation function (wPCF)¶

The weighted pair correlation function (wPCF) is a spatial statistic that extends the standard pair correlation function to identify spatial relationships between two point populations, one of which is labelled with a continuous mark. For full details, see https://doi.org/10.1371/journal.pcbi.1010994 or https://doi.org/10.1017/S2633903X24000011.

In this tutorial, we briefly reproduce the example from Figure 3 of Quantification of spatial and phenotypic heterogeneity in an agent-based model of tumour-macrophage interactions (PLOS Computational Biology 19(3) https://doi.org/10.1371/journal.pcbi.1010994). We start by placing a line of points from population across the line , and a second population placed randomly across the domain. Population points are assigned a mark

In [ ]:

Copied!





# # Visualize the domain with points colored by their mark
# ms.visualise.visualise(pc,
#                        color_by='EPCAM',
#                        figure_kwargs={'figsize': (8, 6)})
# # Visualize the domain with points colored by their mark
# ms.visualise.visualise(pc,
#                        color_by='EPCAM',
#                        figure_kwargs={'figsize': (8, 6)})

In [ ]:

Copied!





# # Query population A from the domain
# popA = ms.query.query(pc, ('label', cluster_labels), 'is', 'Cluster 8')

# # Query population B from the domain
# popB = ms.query.query(pc, ('label', cluster_labels), 'is', 'Cluster 2')

# # Calculate the weighted pair correlation function (wPCF)
# # Parameters:
# # - domain: the domain containing the points
# # - popA: the first population of points (A)
# # - popB: the second population of points (B)
# # - mark_pop_B: the label/mark associated with population B
# # - max_R: the maximum radius to consider for the wPCF
# # - annulus_step: the step size for the annulus radii
# # - annulus_width: the width of each annulus
# radii, wPCF = ms.spatial_statistics.weighted_pair_correlation_function(
#     pc,
#     popA,
#     popB,
#     mark_pop_B='EPCAM',
#     max_R=1,
#     annulus_step=0.1,
#     annulus_width=0.15
# )
# # Query population A from the domain
# popA = ms.query.query(pc, ('label', cluster_labels), 'is', 'Cluster 8')

# # Query population B from the domain
# popB = ms.query.query(pc, ('label', cluster_labels), 'is', 'Cluster 2')

# # Calculate the weighted pair correlation function (wPCF)
# # Parameters:
# # - domain: the domain containing the points
# # - popA: the first population of points (A)
# # - popB: the second population of points (B)
# # - mark_pop_B: the label/mark associated with population B
# # - max_R: the maximum radius to consider for the wPCF
# # - annulus_step: the step size for the annulus radii
# # - annulus_width: the width of each annulus
# radii, wPCF = ms.spatial_statistics.weighted_pair_correlation_function(
#     pc,
#     popA,
#     popB,
#     mark_pop_B='EPCAM',
#     max_R=1,
#     annulus_step=0.1,
#     annulus_width=0.15
# )

Spatial Autocorrelation (Hotspot Analysis)¶

much they are expressed. Certain genes may be highly active in one region of a tumor while nearly silent in another, and these spatial patterns can provide crucial insights into tumor heterogeneity, the tumor microenvironment, and treatment resistance.

To detect and quantify these spatial patterns, we use spatial autocorrelation—a statistical approach that helps us determine whether high or low expression levels of a given gene or marker cluster together within a tissue sample or are randomly distributed. This type of analysis is often called hotspot analysis because it allows us to identify regions of significantly high or low expression—or “hot” and “cold” spots—within the spatial landscape of a tissue.

Compute Moran’s I and Getis-Ord on grid-like data.

In [ ]:

Copied!

cd3 = ms.query.query(pc, ("label", "Transcript ID"), "is", "CD3D")
cd3 = ms.query.query(pc, ("label", "Transcript ID"), "is", "CD3D")

This dataset is composed of point-like data however hotspot analysis is typically conducted on lattice- or grid-like data (think states on a map of the US). More recently, hotspot analysis has emerged as a useful tool in cancer research for analysising regional transcriptomics datasets such as those produced using Visium HD. In fact, spatial autocorrelations can be computed on any type of spatial data - not just grid-like data! We’ll see this later in this tutorial.

In [ ]:

Copied!





# Generate a hexagonal grid over the example domain
# - side_length: length of the side of each hexagon
# - regions_collection_name: name of the collection of hexagonal regions

length_list = [50, 100]  # Set the side length of the hexagons

for length in length_list:
    ms.region_based.generate_hexgrid(
        pc, side_length=length, regions_collection_name=f"Hexgrids {length}"
    )
# Generate a hexagonal grid over the example domain
# - side_length: length of the side of each hexagon
# - regions_collection_name: name of the collection of hexagonal regions

length_list = [50, 100]  # Set the side length of the hexagons

for length in length_list:
    ms.region_based.generate_hexgrid(
        pc, side_length=length, regions_collection_name=f"Hexgrids {length}"
    )

In [ ]:

Copied!

print(pc)
print(pc)

In [ ]:

Copied!





fig, ax = plt.subplots(figsize=(13, 6))
ms.visualise.visualise(
    pc,
    # color_by='region counts: T Helper Cell',
    objects_to_plot=("collection", "Hexgrids 50"),
    ax=ax,
)
ms.visualise.visualise(
    pc,
    color_by="Regis_cluster",
    objects_to_plot=("collection", "Cell centroids"),
    ax=ax,
    marker_size=5,
)  # plot the points
fig, ax = plt.subplots(figsize=(13, 6))
ms.visualise.visualise(
    pc,
    # color_by='region counts: T Helper Cell',
    objects_to_plot=("collection", "Hexgrids 50"),
    ax=ax,
)
ms.visualise.visualise(
    pc,
    color_by="Regis_cluster",
    objects_to_plot=("collection", "Cell centroids"),
    ax=ax,
    marker_size=5,
)  # plot the points

Compute Moran’s I and Getis-Ord directly on spatial data (point-like / any MuSpAn object)

Spatial Network Analysis¶

Neighborhood Clustering

Typically groups nodes based on attribute similarity rather than connection patterns. Common in traditional cluster analysis, it aims to minimize intra-cluster variance or maximize similarity metrics (e.g., Euclidean distance in k-means)
Similarity-based groups
Uses node attributes/features

Community Detection

Focuses on identifying groups of nodes with dense internal connections and sparse external connections within a network. The goal is to reveal inherent structural patterns in relational data (e.g., social networks, collaboration networks).
Density-based subgroups
Relies on edge connections (network structure)

Neighbourhood clustering¶

In [33]:

Copied!





# Perform neighbourhood clustering on the dataset using KNN and minibatchkmeans
# ? Can I call a network I already created and stored in Networks?

neighbourhood_enrichment_matrix, consistent_global_labels, unique_cluster_labels = (
    ms.networks.cluster_neighbourhoods(
        pc,  # The domain dataset
        label_name=cluster_labels,  # The label to use for clustering
        network_kwargs=dict(
            network_type="KNN",
            max_edge_distance=np.inf,
            min_edge_distance=0,
            number_of_nearest_neighbours=10,
        ),  # The network parameters
        k_hops=1,  # The number of hops to consider for the neighbourhood
        neighbourhood_label_name="Neighbourhood ID",  # Name for the neighbourhood label
        cluster_method="minibatchkmeans",  # Clustering method
        cluster_parameters={"n_clusters": 6},  # Parameters for the clustering method
        neighbourhood_enrichment_as="log-fold",  # Neighbourhood enrichment as log-fold
    )
)
# Perform neighbourhood clustering on the dataset using KNN and minibatchkmeans
# ? Can I call a network I already created and stored in Networks?

neighbourhood_enrichment_matrix, consistent_global_labels, unique_cluster_labels = (
    ms.networks.cluster_neighbourhoods(
        pc,  # The domain dataset
        label_name=cluster_labels,  # The label to use for clustering
        network_kwargs=dict(
            network_type="KNN",
            max_edge_distance=np.inf,
            min_edge_distance=0,
            number_of_nearest_neighbours=10,
        ),  # The network parameters
        k_hops=1,  # The number of hops to consider for the neighbourhood
        neighbourhood_label_name="Neighbourhood ID",  # Name for the neighbourhood label
        cluster_method="minibatchkmeans",  # Clustering method
        cluster_parameters={"n_clusters": 6},  # Parameters for the clustering method
        neighbourhood_enrichment_as="log-fold",  # Neighbourhood enrichment as log-fold
    )
)

Warning: KNN networks are currently only implemented for point objects, object centroids will used for network construction

In [34]:

Copied!





# Visualize the dataset with the neighbourhood clustering results
ms.visualise.visualise(
    pc,
    color_by="Neighbourhood ID",
    marker_size=0.5,
    objects_to_plot=("collection", "Cell centroids"),
)
# Visualize the dataset with the neighbourhood clustering results
ms.visualise.visualise(
    pc,
    color_by="Neighbourhood ID",
    marker_size=0.5,
    objects_to_plot=("collection", "Cell centroids"),
)

Out[34]:

(<Figure size 1000x800 with 2 Axes>, <Axes: >)

In [35]:

Copied!





# Create a DataFrame from the neighbourhood enrichment matrix
df_ME_id = pd.DataFrame(
    data=neighbourhood_enrichment_matrix,
    index=unique_cluster_labels,
    columns=consistent_global_labels,
)
df_ME_id.index.name = "Neighbourhood ID"
df_ME_id.columns.name = "Celltype ID"

# Check for non-finite values
non_finite_mask = ~np.isfinite(df_ME_id.values)

if non_finite_mask.any():
    print("Warning: Non-finite values found. Replacing with 0.")
    df_ME_id = df_ME_id.replace([np.inf, -np.inf], np.nan).fillna(0)
# Create a DataFrame from the neighbourhood enrichment matrix
df_ME_id = pd.DataFrame(
    data=neighbourhood_enrichment_matrix,
    index=unique_cluster_labels,
    columns=consistent_global_labels,
)
df_ME_id.index.name = "Neighbourhood ID"
df_ME_id.columns.name = "Celltype ID"

# Check for non-finite values
non_finite_mask = ~np.isfinite(df_ME_id.values)

if non_finite_mask.any():
    print("Warning: Non-finite values found. Replacing with 0.")
    df_ME_id = df_ME_id.replace([np.inf, -np.inf], np.nan).fillna(0)

In [37]:

Copied!





# Visualize the neighbourhood enrichment matrix using a clustermap
sns.clustermap(
    df_ME_id,
    xticklabels=consistent_global_labels,
    yticklabels=unique_cluster_labels,
    figsize=(7.5, 3.5),
    cmap="RdBu_r",
    dendrogram_ratio=(0.05, 0.3),
    col_cluster=False,
    row_cluster=False,
    square=True,
    linewidths=0.5,
    linecolor="black",
    cbar_kws=dict(
        use_gridspec=False,
        location="top",
        label="Neighbourhood enrichment (log-fold)",
        ticks=[-2, 0, 2],
    ),
    cbar_pos=(0.12, 0.75, 0.72, 0.08),
    vmin=-2,
    vmax=2,
    tree_kws={"linewidths": 0, "color": "white"},
)
# Visualize the neighbourhood enrichment matrix using a clustermap
sns.clustermap(
    df_ME_id,
    xticklabels=consistent_global_labels,
    yticklabels=unique_cluster_labels,
    figsize=(7.5, 3.5),
    cmap="RdBu_r",
    dendrogram_ratio=(0.05, 0.3),
    col_cluster=False,
    row_cluster=False,
    square=True,
    linewidths=0.5,
    linecolor="black",
    cbar_kws=dict(
        use_gridspec=False,
        location="top",
        label="Neighbourhood enrichment (log-fold)",
        ticks=[-2, 0, 2],
    ),
    cbar_pos=(0.12, 0.75, 0.72, 0.08),
    vmin=-2,
    vmax=2,
    tree_kws={"linewidths": 0, "color": "white"},
)

Out[37]:

<seaborn.matrix.ClusterGrid at 0x188c317e0>

Community detection¶

At its core, community detection identifies highly connected subcomponents within a graph, which can reveal important patterns and relationships. The interpretation of these communities depends on the context of the graph, allowing researchers to gain insights into the underlying dynamics of the system being studied.

We perform community detection using the community_detection function in the networks submodule. Specifically, we use the Louvain community detection method, which aims to optimise modularity—ensuring there are more edges within a community than between different communities. This method has several parameters that can influence the behavior of community detection. We recommend reviewing the Louvain algorithm for a deeper understanding: https://doi.org/10.1088/1742-5468/2008/10/P10008.

In particular, the resolution parameter plays a key role in controlling the size of the detected communities. A higher resolution value tends to identify smaller, more localised communities, while a lower value results in larger, more global groupings. Adjusting this parameter allows you to tailor the community detection process to your specific needs.

In [38]:

Copied!

print(pc)
print(pc)

Domain name: Regis
Number of objects: 26590
Collections: ['Cell boundaries', 'Nucleus boundaries', 'Transcripts', 'Cell centroids']
Labels: ['Cell ID', 'Transcript Counts', 'Cell Area', 'Cluster ID', 'Nucleus Area', 'Transcript ID', 'Regis_cluster', 'Neighbourhood ID'] 
Networks: ['Delaunay CC unfiltered', 'Delaunay CC filtered', 'prox network centroids 10', 'prox network centroids 20', 'prox network centroids 50'] 
Distance matrices: ['object_boundary_distance_cache']

In [50]:

Copied!





# Perform community detection using the Louvain method with a resolution
res = 0.1

communities_res_1 = ms.networks.community_detection(
    pc,
    network_name="Delaunay CC filtered",
    edge_weight_name=None,
    community_method="louvain",
    community_method_parameters=dict(resolution=res),
    community_label_name=f"Communities : Res = {res}",
)
# Perform community detection using the Louvain method with a resolution
res = 0.1

communities_res_1 = ms.networks.community_detection(
    pc,
    network_name="Delaunay CC filtered",
    edge_weight_name=None,
    community_method="louvain",
    community_method_parameters=dict(resolution=res),
    community_label_name=f"Communities : Res = {res}",
)

In [57]:

Copied!





# Visualise the network with communities detected at resolution
ms.visualise.visualise_network(
    pc,
    network_name="Delaunay CC filtered",
    edge_weight_name=None,
    visualise_kwargs=dict(
        color_by=f"Communities : Res = {res}",
        marker_size=10,
        scatter_kwargs=dict(linewidth=0.01, edgecolor=None),
    ),
    figure_kwargs=dict(figsize=(20, 15)),
)
# Visualise the network with communities detected at resolution
ms.visualise.visualise_network(
    pc,
    network_name="Delaunay CC filtered",
    edge_weight_name=None,
    visualise_kwargs=dict(
        color_by=f"Communities : Res = {res}",
        marker_size=10,
        scatter_kwargs=dict(linewidth=0.01, edgecolor=None),
    ),
    figure_kwargs=dict(figsize=(20, 15)),
)

Out[57]:

(<Figure size 2000x1500 with 2 Axes>, <Axes: >)

Network filteration¶

Network filtrations are a powerful tool for analysing spatial networks at multiple resolutions. In a graph filtration, the network is systematically simplified or expanded by progressively adding or removing edges or nodes based on a chosen parameter, such as spatial distance or interaction strength. Properties of the network (i.e., centralities, structural measures) are then computed during this filtration process

Topological Data Analysis¶

Creating spatial networks¶

Delaunay networks from point-like data¶

Proximity based networks¶

Point-like objects¶

Shape-like objects¶

KNN based networks¶

Spatial Statistics¶

Cross pair correlation function (cross-PCF)¶

The weighted pair correlation function (wPCF)¶

Spatial Autocorrelation (Hotspot Analysis)¶

Spatial Network Analysis¶

Spatial Network Analysis¶

Neighbourhood clustering¶

Community detection¶

Network filteration¶

Topological Data Analysis¶

Distribution Analysis¶

Quantifying Morphology¶