jimg_ncd.nuclei

   1import base64
   2import copy
   3import glob
   4import json
   5import os
   6import random
   7import re
   8import tempfile
   9import webbrowser
  10from collections import Counter
  11from io import BytesIO
  12
  13import cv2
  14import harmonypy as harmonize
  15import matplotlib.pyplot as plt
  16import numpy as np
  17import pandas as pd
  18import plotly.io as pio
  19import plotly.offline as pyo
  20import seaborn as sns
  21import skimage
  22import umap
  23from csbdeep.utils import normalize
  24from scipy.cluster.hierarchy import leaves_list, linkage
  25from skimage import measure
  26from sklearn.cluster import DBSCAN
  27from sklearn.decomposition import PCA
  28from sklearn.preprocessing import MinMaxScaler, StandardScaler
  29from stardist.models import StarDist2D
  30from stardist.plot import render_label
  31from tqdm import tqdm
  32
  33pio.renderers.default = "browser"
  34
  35import jimg_ncd.config as cfg
  36
  37from .utils import *
  38
  39random.seed(42)
  40
  41
  42# new features (nuclei adjustment and repair images)
  43
  44
  45class RepTools:
  46    """
  47    A utility class for processing and repairing nuclei data.
  48    Provides methods for extracting subsets, removing outliers, computing geometrical features,
  49    and merging/splitting nuclei based on spatial and intensity criteria.
  50    """
  51
  52    def extract_dict_by_indices(self, d, indices):
  53        """
  54        Extracts elements from all dictionary lists using provided indices.
  55
  56        Parameters
  57        ----------
  58        d : dict
  59            Dictionary with list values.
  60
  61        indices : list
  62            List of indices to extract from each dictionary entry.
  63
  64        Returns
  65        -------
  66        dict
  67            Dictionary containing only the selected elements.
  68        """
  69
  70        return {
  71            key: [values[i] for i in indices if i < len(values)]
  72            for key, values in d.items()
  73        }
  74
  75    def drop_outlires(self, row, diff_FC_intensity=0.95, circ=0.6):
  76        """
  77        Identify indices of nuclei that are considered outliers based on circularity and intensity.
  78
  79        Parameters
  80        ----------
  81        row : dict
  82            Dictionary containing nuclei properties, including 'circularity' and 'intensity_mean'.
  83
  84        diff_FC_intensity : float
  85            Fraction of mean intensity below which a nucleus is considered an outlier.
  86
  87        circ : float
  88            Minimum circularity threshold for nuclei to be considered.
  89
  90        Returns
  91        -------
  92        list
  93            List of indices to drop as outliers.
  94        """
  95
  96        cd = []
  97        for n, _ in enumerate(row["circularity"]):
  98            if row["circularity"][n] > circ:
  99                cd.append(n)
 100
 101        row = self.extract_dict_by_indices(row, cd)
 102
 103        drop = []
 104        is_mean = np.mean(row["intensity_mean"])
 105
 106        for n, _ in enumerate(row["intensity_mean"]):
 107            FC_mean = row["intensity_mean"][n] / is_mean
 108            if FC_mean < diff_FC_intensity:
 109                drop.append(n)
 110
 111        return drop
 112
 113    def nn(self, coords):
 114        """
 115        Compute close neighbors between nuclei coordinates using a threshold distance.
 116
 117        Parameters
 118        ----------
 119        coords : list
 120            List of numpy arrays, each containing coordinates for a nucleus.
 121
 122        Returns
 123        -------
 124        dict
 125            Dictionary mapping pairs of nuclei indices to the number of close neighbors.
 126        """
 127
 128        full_list = {}
 129        for i in range(len(coords)):
 130            for j in range(len(coords)):
 131                if i != j:
 132
 133                    tree1 = cKDTree(coords[i])
 134
 135                    distances, indices = tree1.query(coords[j])
 136
 137                    threshold = 2
 138                    close_neighbors = np.sum(distances < threshold)
 139
 140                    full_list[f"{i} --> {j}"] = close_neighbors
 141
 142        return full_list
 143
 144    def compute_axes_length(self, contour):
 145        """
 146        Compute major and minor axis lengths of a nucleus from its contour.
 147
 148        Parameters
 149        ----------
 150        contour : np.ndarray
 151            Coordinates of nucleus contour points.
 152
 153        Returns
 154        -------
 155        tuple
 156            Major and minor axis lengths.
 157        """
 158
 159        cov = np.cov(contour.T)
 160
 161        eigvals, _ = np.linalg.eigh(cov)
 162
 163        axis_major_length = 2 * np.sqrt(eigvals.max())
 164        axis_minor_length = 2 * np.sqrt(eigvals.min())
 165
 166        return axis_major_length, axis_minor_length
 167
 168    def compute_eccentricity(self, contour):
 169        """
 170        Compute eccentricity of a nucleus from its contour.
 171
 172        Parameters
 173        ----------
 174        contour : np.ndarray
 175            Coordinates of nucleus contour points.
 176
 177        Returns
 178        -------
 179        float
 180            Eccentricity of the nucleus.
 181        """
 182
 183        cov = np.cov(contour.T)
 184        eigvals, _ = np.linalg.eigh(cov)
 185
 186        eccentricity = np.sqrt(1 - (eigvals.min() / eigvals.max()))
 187        return eccentricity
 188
 189    def compute_feret_diameter(self, contour):
 190        """
 191        Compute the Feret diameter of a given contour.
 192
 193        The Feret diameter is defined as the maximum pairwise Euclidean distance between points in the contour.
 194
 195        Parameters
 196        ----------
 197        contour : np.ndarray
 198            Array of shape (N, 2) representing the contour coordinates.
 199
 200        Returns
 201        -------
 202        float
 203            The maximum distance between any two points in the contour.
 204        """
 205
 206        rect = cv2.minAreaRect(contour)
 207        (w, h) = rect[1]
 208        return max(w, h)
 209
 210    def compute_perimeter(self, contour):
 211        """
 212        Compute the perimeter of a contour.
 213
 214        The perimeter is calculated as the sum of Euclidean distances between consecutive points in the contour.
 215
 216        Parameters
 217        ----------
 218        contour : np.ndarray
 219            Array of shape (N, 2) representing the contour coordinates.
 220
 221        Returns
 222        -------
 223        float
 224            Perimeter length of the contour.
 225        """
 226
 227        return np.sum(np.linalg.norm(np.diff(contour, axis=0), axis=1))
 228
 229    def compute_circularity(self, contour):
 230        """
 231        Compute the circularity of a contour.
 232
 233        Circularity is a measure of how close the shape is to a perfect circle.
 234        It is calculated as 4 * pi * (area / perimeter^2).
 235
 236        Parameters
 237        ----------
 238        contour : np.ndarray
 239            Array of shape (N, 2) representing the contour coordinates.
 240
 241        Returns
 242        -------
 243        float
 244            Circularity of the contour. Value ranges from 0 to 1, where 1 indicates a perfect circle.
 245        """
 246        perimeter = self.compute_perimeter(contour)
 247        hull = ConvexHull(contour)
 248        area = hull.volume
 249
 250        return (4 * np.pi * area) / (perimeter**2)
 251
 252    def repairing_nuclei(self, results):
 253        """
 254        Repair nuclei segmentation results by merging or removing outlier nuclei.
 255
 256        This method adjusts nuclei detection results based on global and local thresholds, circularity, nearest neighbor relationships,
 257        and merges small or fragmented nuclei when appropriate. It also recalculates key morphological properties for merged nuclei.
 258
 259        Parameters
 260        ----------
 261        results : dict
 262            Dictionary where keys are image identifiers and values are dictionaries containing detected nuclei properties
 263            (e.g., 'area', 'coords', 'label', 'circularity', etc.).
 264
 265        Returns
 266        -------
 267        dict
 268            A dictionary in the same structure as `results`, but with repaired nuclei information after merging or removing outliers.
 269        """
 270
 271        # repairing nuclei
 272        mean_sum_area = []
 273        im = []
 274        n = []
 275        for r in tqdm(results.keys()):
 276            mean_sum_area.append(np.sum(results[r]["area"]))
 277            n.append(len(results[r]["area"]))
 278            im.append(r)
 279
 280        mean_sum_area_sum = np.mean(mean_sum_area)
 281
 282        results_dict = {}
 283
 284        print("\nImage repairing:\n\n")
 285
 286        for i, m in tqdm(zip(im, n), total=len(im)):
 287
 288            if (
 289                m > 1
 290                and np.sum(results[i]["area"]) / mean_sum_area_sum
 291                < self.hyperparameter_nuclei["FC_diff_global"]
 292            ):
 293                # adjustment to global changes
 294
 295                temporary_dict = results[i]
 296
 297                check_drop = self.drop_outlires(
 298                    temporary_dict,
 299                    diff_FC_intensity=self.hyperparameter_nuclei[
 300                        "FC_diff_local_intensity"
 301                    ],
 302                    circ=self.hyperparameter_nuclei["circularity"],
 303                )
 304
 305                to_final = [
 306                    x
 307                    for x in list(range(len(temporary_dict["area"])))
 308                    if int(x) not in check_drop
 309                ]
 310
 311                tmp = self.extract_dict_by_indices(temporary_dict, to_final)
 312
 313                to_concat = []
 314
 315                if len(tmp["coords"]) > 1:
 316
 317                    results_nn = self.nn(tmp["coords"])
 318
 319                    for kn in results_nn.keys():
 320                        if results_nn[kn] > self.hyperparameter_nuclei["nn_min"]:
 321                            to_concat.append(int(re.sub(" --> .*", "", kn)))
 322                            to_concat.append(int(re.sub(".* --> ", "", kn)))
 323
 324                    to_concat = list(set(to_concat))
 325
 326                    to_rest = [
 327                        x for x in list(range(len(tmp["area"]))) if x not in to_concat
 328                    ]
 329
 330                #
 331                if len(to_concat) > 1:
 332                    to_concat_dict = self.extract_dict_by_indices(tmp, to_concat)
 333                    to_concat_dict["coords"] = [np.vstack(to_concat_dict["coords"])]
 334                    to_concat_dict["label"] = [min(to_concat_dict["label"])]
 335                    to_concat_dict["area"] = [np.sum(to_concat_dict["area"])]
 336                    to_concat_dict["area_bbox"] = [np.sum(to_concat_dict["area_bbox"])]
 337                    to_concat_dict["area_convex"] = [
 338                        np.sum(to_concat_dict["area_convex"])
 339                    ]
 340                    to_concat_dict["area_filled"] = [
 341                        np.sum(to_concat_dict["area_filled"])
 342                    ]
 343                    to_concat_dict["intensity_max"] = [
 344                        np.max(to_concat_dict["intensity_max"])
 345                    ]
 346                    to_concat_dict["intensity_mean"] = [
 347                        np.mean(to_concat_dict["intensity_mean"])
 348                    ]
 349                    to_concat_dict["intensity_min"] = [
 350                        np.min(to_concat_dict["intensity_min"])
 351                    ]
 352                    major, minor = self.compute_axes_length(to_concat_dict["coords"][0])
 353                    to_concat_dict["axis_major_length"] = [major]
 354                    to_concat_dict["axis_minor_length"] = [minor]
 355                    to_concat_dict["ratio"] = [minor / major]
 356                    ecc = self.compute_eccentricity(to_concat_dict["coords"][0])
 357                    to_concat_dict["eccentricity"] = [ecc]
 358                    to_concat_dict["equivalent_diameter_area"] = [
 359                        np.sum(to_concat_dict["equivalent_diameter_area"])
 360                    ]
 361                    feret_diameter = self.compute_feret_diameter(
 362                        to_concat_dict["coords"][0]
 363                    )
 364                    to_concat_dict["feret_diameter_max"] = [feret_diameter]
 365                    to_concat_dict["solidity"] = [np.mean(to_concat_dict["solidity"])]
 366                    to_concat_dict["perimeter"] = [np.sum(to_concat_dict["perimeter"])]
 367                    to_concat_dict["perimeter_crofton"] = [
 368                        np.sum(to_concat_dict["perimeter_crofton"])
 369                    ]
 370                    to_concat_dict["circularity"] = [
 371                        np.mean(to_concat_dict["circularity"])
 372                    ]
 373
 374                    to_rest_dict = self.extract_dict_by_indices(tmp, to_rest)
 375
 376                    for ik in to_rest_dict.keys():
 377                        to_rest_dict[ik] = to_rest_dict[ik] + to_concat_dict[ik]
 378
 379                    results_dict[i] = to_rest_dict
 380
 381                else:
 382                    results_dict[i] = tmp
 383
 384            elif (
 385                m == 1
 386                and results[i]["circularity"][0]
 387                > self.hyperparameter_nuclei["circularity"]
 388            ):
 389
 390                results_dict[i] = results[i]
 391
 392        return results_dict
 393
 394
 395class ImagesManagement:
 396    """
 397    A class for managing, preprocessing, merging, stitching, saving, and loading
 398    microscopy or flow cytometry images used in NucleiFinder-based analyses.
 399
 400    This class provides a unified interface for:
 401
 402    - loading image data,
 403    - selecting images by IDs,
 404    - preprocessing images (equalization, CLAHE, gamma/contrast/brightness adjustment),
 405    - merging images with user-defined intensity ratios,
 406    - stitching images horizontally,
 407    - retrieving and saving processed image sets.
 408
 409    The class stores original or loaded data in the ``results_images`` attribute,
 410    and all processed images in ``prepared_images`` under user-defined acronyms.
 411    These acronyms allow flexible retrieval with ``get_prepared_images()``
 412    and exporting via ``save_prepared_images()``.
 413
 414    Parameters
 415    ----------
 416    images_ids : list[int]
 417        List of selected image identifiers.
 418
 419    result_dict : dict or None
 420        Dictionary containing raw or preprocessed images.
 421        If ``None``, images may later be loaded or processed from file.
 422
 423    experiment_name : str
 424        Name of the experiment. Used for saving and structuring output.
 425
 426    Attributes
 427    ----------
 428    images_ids : list[int]
 429        IDs of images managed by the class.
 430
 431    results_images : dict or None
 432        Dictionary containing raw or analysis-derived images.
 433
 434    experiment_name : str
 435        Name of the experiment. Used in saved filenames.
 436
 437    prepared_images : dict
 438        Container for processed/adjusted/merged/stitched images,
 439        indexed by user-defined acronyms.
 440
 441    Notes
 442    -----
 443    Processed images are stored only in memory until saved explicitly with
 444    ``save_prepared_images()``.
 445
 446    Raw images loaded from NucleiFinder analyses can be saved for later reuse
 447    in a serialized `.inuc` format using ``save_raw()``.
 448
 449    Examples
 450    --------
 451    Load image results from an analysis:
 452
 453    >>> manager = ImagesManagement.load_experimental_images(results, "experiment1")
 454
 455    Adjust selected images:
 456
 457    >>> manager.adjust_images(
 458    ...     acronyme="adj",
 459    ...     path_to_images="path/to/imgs",
 460    ...     eq=True,
 461    ...     clahe=True
 462    ... )
 463
 464    Merge multiple prepared sets:
 465
 466    >>> manager.image_merging(["adj", "other"], ratio_list=[0.7, 0.3])
 467
 468    Retrieve processed images:
 469
 470    >>> imgs = manager.get_prepared_images("adj")
 471
 472    Save stitched images to disk:
 473
 474    >>> manager.save_prepared_images("stitched_adj_other", "./output/")
 475    """
 476
 477    def __init__(self, images_ids, result_dict, experiment_name):
 478        """
 479        Initialize the ImagesManagement object.
 480
 481        Parameters
 482        ----------
 483        images_ids : list[int]
 484            List of image identifiers.
 485
 486        result_dict : dict or None
 487            Dictionary containing processed images.
 488
 489        experiment_name : str
 490            Name of the experiment.
 491        """
 492
 493        self.images_ids = images_ids
 494        """Stores the list of image IDs managed by this instance."""
 495        self.results_images = result_dict
 496        """Stores dictionary containing processed images."""
 497        self.experiment_name = experiment_name
 498        """Stores the experiment name for file naming and organizational purposes."""
 499        self.prepared_images = {}
 500        """Dictionary for storing processed images (adjusted, merged, stitched),
 501        indexed by user-defined acronyms for flexible retrieval."""
 502
 503    @classmethod
 504    def load_from_dict(cls, path: str, experiment_name: str):
 505        """
 506        Load an ImagesManagement instance from a `.inuc` serialized dictionary.
 507
 508        Parameters
 509        ----------
 510        path : str
 511            Path to the `.inuc` file exported with `save_raw()`.
 512
 513        experiment_name : str
 514            Name of the experiment.
 515
 516        Returns
 517        -------
 518        ImagesManagement
 519            A reconstructed ImagesManagement object.
 520        """
 521
 522        if ".inuc" in path:
 523
 524            if os.path.exists(path):
 525
 526                loaded_data = np.load(path)
 527                data_dict = {key: loaded_data[key] for key in loaded_data}
 528
 529                id_list = []
 530
 531                for k in data_dict.keys():
 532                    id_list.append(re.sub("_.*", "", k))
 533
 534                return cls(id_list, data_dict, experiment_name)
 535
 536            else:
 537                raise ValueError("\nInvalid path!")
 538
 539        else:
 540            raise ValueError(
 541                "\nInvalid dictionary to load. It must contain a .inuc extension!"
 542            )
 543
 544    @classmethod
 545    def load_experimental_images(cls, results_dict: dict, experiment_name: str):
 546        """
 547        Load results exported from NucleiFinder series analysis.
 548
 549        Initialize the object with results from series_analysis_nuclei()
 550        or series_analysis_chromatinization() of the NucleiFinder class.
 551
 552
 553        Parameters
 554        ----------
 555        results_dict : dict
 556            Dictionary returned by `series_analysis_nuclei()` or
 557            `series_analysis_chromatinization()`.
 558
 559        experiment_name : str
 560            Name of the experiment.
 561
 562        Returns
 563        -------
 564        ImagesManagement
 565
 566        """
 567
 568        res_dict = {}
 569        id_list = []
 570
 571        if set(results_dict[list(results_dict.keys())[0]].keys()) != set(
 572            ["stats", "img"]
 573        ):
 574            raise ValueError(
 575                "Incorrect data provided. The data must come from series_analysis_nuclei() "
 576                "or series_analysis_chromatinization() of the NucleiFinder class."
 577            )
 578
 579        for k in results_dict.keys():
 580            res_dict[k] = results_dict[k]["img"]
 581            id_list.append(re.sub("_.*", "", k))
 582
 583        return cls(id_list, res_dict, experiment_name)
 584
 585    @classmethod
 586    def load_images_ids(cls, images_ids: list, experiment_name: str):
 587        """
 588        Initialize the object with list of images IDs for porcesing.
 589
 590        Parameters
 591        ----------
 592        images_ids : list[int]
 593            List of selected image IDs.
 594
 595        experiment_name : str
 596            Name of the experiment.
 597
 598        Returns
 599        -------
 600        ImagesManagement
 601
 602        """
 603
 604        if len(images_ids) == 0:
 605            raise ValueError(
 606                "Incorrect data provided. There must be a list of image IDs."
 607            )
 608
 609        return cls(images_ids, None, experiment_name)
 610
 611    def get_included_acronyms(self):
 612        """
 613        Print the data acronyms for adjusted images, processed using the
 614        self.adjust_images(), self.image_merging(), and self.image_stitching() methods.
 615
 616        Acronym information is essential for retrieving and saving data using
 617        the self.get_prepared_images() and self.save_prepared_images() methods.
 618
 619        Notes
 620        -----
 621        This method prints the list of available acronyms but does not return it.
 622
 623        """
 624
 625        if len(self.prepared_images.keys()) > 0:
 626            print("\nAvaiable stored images:\n")
 627            for kd in self.prepared_images.keys():
 628                print(kd)
 629
 630        else:
 631            print("Nothing to return!")
 632
 633    def get_prepared_images(self, acronyme=None):
 634        """
 635        Retrieves the prepared images (returned from adjust_images()) stored in the object.
 636
 637
 638        Parameters
 639        ----------
 640        acronyme : str or None
 641            Acronym identifying a processed image set. If None, prints available keys.
 642
 643
 644        Returns
 645        -------
 646        dict
 647            Dictionary of prepared images.
 648        """
 649
 650        if acronyme is None:
 651
 652            self.get_included_acronyms()
 653
 654        else:
 655
 656            if acronyme in list(self.prepared_images.keys()):
 657                return self.prepared_images[acronyme]
 658
 659            raise ValueError("Incorrect acronyme!")
 660
 661    def save_prepared_images(self, acronyme: str, path_to_save: str = ""):
 662        """
 663        Saves prepared images (returned from adjust_images() method) to the specified directory.
 664
 665        Parameters
 666        ----------
 667        path_to_save : str
 668            Directory path where the images will be saved. Default is the current working directory.
 669
 670        """
 671        if acronyme is None:
 672
 673            self.get_included_acronyms()
 674
 675        else:
 676
 677            if acronyme in list(self.prepared_images.keys()):
 678
 679                path_to_save = os.path.join(
 680                    path_to_save, f"{self.experiment_name}_{acronyme}"
 681                )
 682
 683                if not os.path.exists(path_to_save):
 684                    os.makedirs(path_to_save, exist_ok=True)
 685
 686                for i in tqdm(self.prepared_images[acronyme].keys()):
 687                    cv2.imwrite(
 688                        os.path.join(path_to_save, i + ".png"),
 689                        self.prepared_images[acronyme][i],
 690                    )
 691
 692            else:
 693                raise ValueError("Incorrect acronyme!")
 694
 695    def adjust_images(
 696        self,
 697        acronyme: str,
 698        path_to_images: str,
 699        file_extension: str = "tif",
 700        eq: bool = True,
 701        clahe: bool = True,
 702        kernal: tuple = (50, 50),
 703        fille_name_part: str = "",
 704        color: str = "gray",
 705        max_intensity: int = 65535,
 706        min_intenisty: int = 0,
 707        brightness: int = 1000,
 708        contrast: float = 1.0,
 709        gamma: float = 1.0,
 710        img_n: int = 0,
 711    ):
 712        """
 713        Prepares selected images for processing, applying histogram equalization and CLAHE, if required.
 714
 715        Parameters
 716        ----------
 717        acronyme : str
 718            Name of images being adjusted in this run.
 719
 720        path_to_images : str
 721            Path to the directory containing images.
 722
 723        file_extension : str
 724            Image file extension. Default is 'tiff'.
 725
 726        eq : bool
 727            Whether to apply histogram equalization. Default is True.
 728
 729        clahe : bool
 730            Whether to apply CLAHE. Default is True.
 731
 732        kernal : tuple
 733            Kernel size for CLAHE. Default is (50, 50).
 734
 735        fille_name_part : str
 736            Part of the file name to filter images. Default is an empty string.
 737
 738        color : str
 739            Color space to use. Default is 'gray'.
 740
 741        max_intensity : int
 742            Maximum intensity for image adjustment. Default is 65535.
 743
 744        min_intenisty : int
 745            Minimum intensity for image adjustment. Default is 0.
 746
 747        brightness : int
 748            Brightness adjustment value. Default is 1000.
 749
 750        contrast : float
 751            Contrast adjustment factor. Default is 1.0.
 752
 753        gamma : float
 754            Gamma correction factor. Default is 1.0.
 755
 756        img_n : int
 757            Number of images to process. Default is 0, which means all images.
 758
 759
 760        Returns
 761        -------
 762        dict
 763            Dictionary containing the processed images.
 764
 765        Notes
 766        -----
 767        To access the processed images, use the ``get_prepared_images()`` method.
 768
 769        To save the processed images to disk, use the ``save_prepared_images()`` method.
 770        """
 771
 772        results_dict = {}
 773
 774        files = glob.glob(os.path.join(path_to_images, "*." + file_extension))
 775
 776        if len(fille_name_part) > 0:
 777            files = [x for x in files if fille_name_part.lower() in x.lower()]
 778
 779        selected_id = self.images_ids
 780
 781        if len(selected_id) > 0:
 782            selected_id = [str(x) for x in selected_id]
 783            files = [
 784                x
 785                for x in files
 786                if re.sub("_.*", "", os.path.basename(x)) in selected_id
 787            ]
 788
 789        if img_n > 0:
 790
 791            files = random.sample(files, img_n)
 792
 793        for file in tqdm(files):
 794
 795            image = load_image(file)
 796
 797            try:
 798                image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
 799            except:
 800                pass
 801
 802            if eq is True:
 803                image = equalizeHist_16bit(image)
 804
 805            if clahe is True:
 806                image = clahe_16bit(image, kernal=kernal)
 807
 808            image = adjust_img_16bit(
 809                img=image,
 810                color=color,
 811                max_intensity=max_intensity,
 812                min_intenisty=min_intenisty,
 813                brightness=brightness,
 814                contrast=contrast,
 815                gamma=gamma,
 816            )
 817
 818            results_dict[os.path.basename(file)] = image
 819
 820        self.prepared_images[acronyme] = results_dict
 821
 822    def image_merging(self, acronyms: list, ratio_list: list):
 823        """
 824        Merge previously prepared images stored in `self.prepared_images`,
 825        adjusted based on the image ratios. The used ratios adjust relative image intensity.
 826
 827        Parameters
 828        ----------
 829        acronyme : list
 830            List of image names to be merged.
 831
 832        ratio_list : list[float]
 833            List of ratio intensity values (0.0–1.0) for the merged image.
 834            The `acronyme` list and `ratio_list` must be of the same length.
 835
 836        Returns
 837        -------
 838        dict
 839            Dictionary of processed images.
 840
 841        Notes
 842        -----
 843        To access the processed images, use the ``get_prepared_images()`` method.
 844
 845        To save the processed images to disk, use the ``save_prepared_images()`` method.
 846        """
 847
 848        for a in acronyms:
 849            if a not in list(self.prepared_images.keys()):
 850                raise ValueError(f"Incorrect {a} acronyme!")
 851
 852        results_img = {}
 853        for k in self.images_ids:
 854            img_list = []
 855            for a in acronyms:
 856                nam = [
 857                    x
 858                    for x in self.prepared_images[a].keys()
 859                    if str(k) == re.sub("_.*", "", x)
 860                ]
 861                if len(nam) == 0:
 862                    print(f"There were not images for {k} ids")
 863                    break
 864
 865                img_list.append(self.prepared_images[a][nam[0]])
 866
 867            if len(img_list) == len(acronyms):
 868                results_img[f'{k}_{"_".join(acronyms)}'] = merge_images(
 869                    img_list, ratio_list
 870                )
 871
 872        self.prepared_images[f'merged_{"_".join(acronyms)}'] = results_img
 873
 874        print(f'Images stored in self.prepared_images["merged_{"_".join(acronyms)}"]')
 875
 876    def image_stitching(self, acronyms: list, to_results_image: bool = False):
 877        """
 878        Stitch (horizontally) previously prepared images stored in `self.prepared_images`.
 879
 880        Parameters
 881        ----------
 882        acronyme : list
 883            List of image names to be stitched.
 884
 885        to_results_image : bool
 886            Boolean value indicating whether images obtained from the
 887            `series_analysis_nuclei()` or `series_analysis_chromatinization()`
 888            methods of the `NucleiFinder` class should be stitched to the right
 889            side of the images in the `acronyme` list.
 890
 891        Returns
 892        -------
 893        dict
 894            Dictionary of processed images.
 895
 896        Notes
 897        -----
 898        To access the processed images, use the ``get_prepared_images()`` method.
 899
 900        To save the processed images to disk, use the ``save_prepared_images()`` method.
 901        """
 902
 903        for a in acronyms:
 904            if a not in list(self.prepared_images.keys()):
 905                raise ValueError(f"Incorrect {a} acronyme!")
 906
 907        results_img = {}
 908        for k in tqdm(self.images_ids):
 909            img_list = []
 910            for a in acronyms:
 911                nam = [
 912                    x
 913                    for x in self.prepared_images[a].keys()
 914                    if str(k) == re.sub("_.*", "", x)
 915                ]
 916                if len(nam) == 0:
 917                    print(f"There were not images for {k} ids")
 918                    break
 919
 920                img_list.append(self.prepared_images[a][nam[0]])
 921
 922            if to_results_image:
 923                nam = [
 924                    x
 925                    for x in self.results_images.keys()
 926                    if str(k) == re.sub("_.*", "", x)
 927                ]
 928                if len(nam) != 0:
 929                    img_list.append(self.results_images[nam[0]])
 930
 931                if len(img_list) == len(acronyms) + 1:
 932                    results_img[f'{k}_{"_".join(acronyms)}_res'] = cv2.hconcat(img_list)
 933
 934            elif to_results_image is not False:
 935                if len(img_list) == len(acronyms):
 936                    results_img[f'{k}_{"_".join(acronyms)}'] = cv2.hconcat(img_list)
 937
 938        self.prepared_images[f'stitched_{"_".join(acronyms)}'] = results_img
 939
 940        print(f'Images stored in self.prepared_images["stitched_{"_".join(acronyms)}"]')
 941
 942    def save_raw(self, path_to_save: str = ""):
 943        """
 944        Save `self.results_images` loaded by the `self.load_experimental_images()` method,
 945        obtained from the `series_analysis_nuclei()` or `series_analysis_chromatinization()`
 946        methods of the `NucleiFinder` class for later usage with cls.load_from_dict() method.
 947        The data will be saved with a `.inuc` extension.
 948
 949        Parameters
 950        ----------
 951        path_to_save : str
 952            The directory path where the images will be saved.
 953            Default is the current working directory.
 954        """
 955
 956        full_path = os.path.join(path_to_save, f"{self.experiment_name}.inuc")
 957
 958        np.savez(full_path, **self.results_images)
 959
 960
 961class NucleiFinder(ImageTools, RepTools):
 962    """
 963    Implements a comprehensive pipeline for automated segmentation,
 964    selection, and analysis of cell nuclei and their internal chromatin structure
 965    in microscopy images.
 966
 967    It utilizes a pre-trained deep learning model (StarDist2D) for initial
 968    nuclear identification, followed by the application of advanced morphological
 969    and intensity filters, and a dedicated algorithm for quantifying chromatinization.
 970    The class provides detailed control over the hyperparameters for both the
 971    segmentation process and image preprocessing stages.
 972
 973    Parameters
 974    ----------
 975    image : np.ndarray, optional
 976        The input image (typically 16-bit) for analysis.
 977
 978    test_results : list, optional
 979        Plots resulting from parameter testing (e.g., NMS/Prob combinations).
 980
 981    hyperparameter_nuclei : dict, optional
 982        Parameters for nuclei segmentation and filtering (e.g., 'nms', 'prob', 'min_size', 'circularity').
 983
 984    hyperparameter_chromatinization : dict, optional
 985        Parameters for segmenting and filtering chromatin spots (e.g., 'cut_point', 'ratio').
 986
 987    img_adj_par_chrom : dict, optional
 988        Image adjustment parameters (gamma, contrast) specifically for chromatin analysis.
 989
 990    img_adj_par : dict, optional
 991        Image adjustment parameters for nuclei segmentation.
 992
 993    show_plots : bool, optional
 994        Flag controlling the automatic display of visual results.
 995
 996    nuclei_results : dict, optional
 997        A dictionary storing numerical data (features) extracted from the nuclei.
 998
 999    images : dict, optional
1000        A dictionary storing output images and masks.
1001
1002    Attributes
1003    ----------
1004    image : np.ndarray
1005        The currently loaded image for analysis.
1006
1007    test_results : list
1008        The visual outcomes of NMS/Prob parameter tests.
1009
1010    hyperparameter_nuclei : dict
1011        A dictionary of active parameters used by the `find_nuclei()` and `select_nuclei()` methods.
1012
1013    hyperparameter_chromatinization : dict
1014        A dictionary of active parameters used by the `nuclei_chromatinization()` method.
1015
1016    img_adj_par : dict
1017        Image correction parameters for nuclei segmentation.
1018
1019    img_adj_par_chrom : dict
1020        Image correction parameters for chromatin analysis.
1021
1022    show_plots : bool
1023        The state of the plot display flag.
1024
1025    nuclei_results : dict
1026        Stores feature dictionaries for: all detected ('nuclei'), selected ('nuclei_reduced'),
1027        and chromatinization data ('nuclei_chromatinization').
1028
1029    images : dict
1030        Stores masks and images visualizing the results.
1031
1032    series_im : bool
1033        Flag indicating if the class is operating in a batch or series processing mode.
1034
1035    Methods
1036    -------
1037    set_nms(nms)
1038        Sets the Non-Maximum Suppression (NMS) threshold.
1039
1040    set_prob(prob)
1041        Sets the segmentation probability threshold.
1042
1043    set_nuclei_circularity(circ)
1044        Sets the minimum required circularity for a nucleus.
1045
1046    set_nuclei_local_intenisty_FC(local_FC)
1047        Sets the factor used for removing false positives based on local intensity differences.
1048
1049    set_nuclei_global_area_FC(global_FC)
1050        Sets the factor used for removing size-based outlier false positives.
1051
1052    set_nuclei_size(size)
1053        Sets the minimum and maximum area (in pixels) for nuclei selection.
1054
1055    set_nuclei_min_mean_intensity(intensity)
1056        Sets the minimum required mean intensity value for a nucleus.
1057
1058    set_chromatinization_size(size)
1059        Sets the minimum and maximum area (in pixels) for chromatin spot selection.
1060
1061    set_chromatinization_cut_point(cut_point)
1062        Sets the factor used to adjust the chromatin segmentation threshold (Otsu's method).
1063
1064    set_adj_image_gamma(gamma)
1065        Sets the gamma correction for the nuclei image.
1066
1067    set_adj_chrom_contrast(contrast)
1068        Sets the contrast adjustment for the chromatinization image.
1069
1070    current_parameters_nuclei (property)
1071        Returns the active nuclei segmentation and filtering parameters.
1072
1073    find_nuclei()
1074        Performs nuclei segmentation using StarDist and extracts initial features.
1075
1076    select_nuclei()
1077        Filters the detected nuclei based on set morphological and intensity criteria.
1078
1079    nuclei_chromatinization()
1080        Performs quantitative and morphological analysis of chromatin spots in selected nuclei.
1081
1082    get_features(model_out, image)
1083        Calculates geometric and intensity features from a segmented mask (label image).
1084
1085    Notes
1086    -----
1087    The typical analysis workflow follows this order:
1088    1. `input_image()`
1089    2. `find_nuclei()`
1090    3. `select_nuclei()` (Optional)
1091    4. `nuclei_chromatinization()` (Optional)
1092    """
1093
1094    def __init__(
1095        self,
1096        image=None,
1097        test_results=None,
1098        hyperparameter_nuclei=None,
1099        hyperparameter_chromatinization=None,
1100        img_adj_par_chrom=None,
1101        img_adj_par=None,
1102        show_plots=None,
1103        nuclei_results=None,
1104        images=None,
1105    ):
1106        """
1107        The main class for the detection and analysis of cell nuclei and their chromatinization
1108        in microscopy or flow cytometry images, utilizing the StarDist segmentation model.
1109
1110        This class inherits functionality for image processing (ImageTools) and
1111        results handling (RepTools).
1112
1113        Parameters
1114        ----------
1115        image : np.ndarray, optional
1116            The input image for analysis.
1117            Default: None.
1118
1119        test_results : list, optional
1120            A list of plots or images resulting from parameter testing.
1121            Default: None.
1122
1123        hyperparameter_nuclei : dict, optional
1124            The segmentation parameters for nuclei detection.
1125            Default:
1126            {'nms': 0.8, 'prob': 0.4, 'max_size': 1000, 'min_size': 20,
1127             'circularity': 0.6, 'intensity_mean': 6553.5, 'nn_min': 10,
1128             'FC_diff_global': 1.5, 'FC_diff_local_intensity': 0.6}
1129
1130        hyperparameter_chromatinization : dict, optional
1131            The analysis parameters for chromatin spots within the nuclei.
1132            Default:
1133            {'max_size': 800, 'min_size': 2, 'ratio': 0.1, 'cut_point': 0.95}
1134
1135        img_adj_par_chrom : dict, optional
1136            Image adjustment parameters (gamma, contrast, brightness) for chromatin analysis.
1137            Default: {'gamma': 0.25, 'contrast': 5, 'brightness': 950}
1138
1139        img_adj_par : dict, optional
1140            Image adjustment parameters (gamma, contrast, brightness) for nuclei segmentation.
1141            Default: {'gamma': 0.9, 'contrast': 2, 'brightness': 1000}
1142
1143        show_plots : bool, optional
1144            Flag to determine whether results and plots should be displayed automatically.
1145            Default: True.
1146
1147        nuclei_results : dict, optional
1148            A dictionary storing the numerical results of the analysis.
1149            Default: {'nuclei': None, 'nuclei_reduced': None, 'nuclei_chromatinization': None}
1150
1151        images : dict, optional
1152            A dictionary storing the output images (e.g., masks).
1153            Default: {'nuclei': None, 'nuclei_reduced': None, 'nuclei_chromatinization': None}
1154
1155        Attributes
1156        ----------
1157        image : np.ndarray
1158            The currently loaded image for analysis.
1159
1160        hyperparameter_nuclei : dict
1161            Active nuclei segmentation parameters.
1162
1163        hyperparameter_chromatinization : dict
1164            Active chromatinization analysis parameters.
1165
1166        img_adj_par : dict
1167            Active image correction parameters for nuclei segmentation.
1168
1169        img_adj_par_chrom : dict
1170            Active image correction parameters for chromatin analysis.
1171
1172        show_plots : bool
1173            The current state of the plot display flag.
1174
1175        series_im : bool
1176            Flag indicating if a series of images is being processed.
1177
1178        Notes
1179        -----
1180        The default value for 'intensity_mean' in hyperparameter_nuclei is calculated
1181        as $(2^{16} - 1) / 10$, which represents 10% of the maximum 16-bit value (65535 / 10 = 6553.5).
1182
1183        The image adjustment parameters are crucial for optimizing contrast and brightness
1184        to improve the performance of both the StarDist model and the subsequent
1185        chromatin thresholding.
1186        """
1187
1188        # Use default values if parameters are None
1189        self.image = image or None
1190        """Loaded input image."""
1191        self.test_results = test_results or None
1192        """Results of parameter tests.
1193
1194            This attribute or method stores the outcomes of parameter testing procedures.
1195            For interactive browsing and inspection of the results, use the 
1196            `browser_test(self)` method."""
1197
1198        self.hyperparameter_nuclei = hyperparameter_nuclei or {
1199            "nms": 0.8,
1200            "prob": 0.4,
1201            "max_size": 1000,
1202            "min_size": 20,
1203            "circularity": 0.6,
1204            "intensity_mean": (2**16 - 1) / 10,
1205            "nn_min": 10,
1206            "FC_diff_global": 1.5,
1207            "FC_diff_local_intensity": 0.6,
1208        }
1209        """Active nuclei segmentation/filter parameters."""
1210
1211        self.hyperparameter_chromatinization = hyperparameter_chromatinization or {
1212            "max_size": 800,
1213            "min_size": 2,
1214            "ratio": 0.1,
1215            "cut_point": 0.95,
1216        }
1217        """Active chromatin analysis parameters."""
1218
1219        self.img_adj_par_chrom = img_adj_par_chrom or {
1220            "gamma": 0.25,
1221            "contrast": 5,
1222            "brightness": 950,
1223        }
1224        """Image adjustment for chromatin analysis."""
1225
1226        self.img_adj_par = img_adj_par or {
1227            "gamma": 0.9,
1228            "contrast": 2,
1229            "brightness": 1000,
1230        }
1231        """Image adjustment for nuclei segmentation."""
1232
1233        self.show_plots = show_plots or True
1234        """Flag controlling plot display."""
1235
1236        self.nuclei_results = nuclei_results or {
1237            "nuclei": None,
1238            "nuclei_reduced": None,
1239            "nuclei_chromatinization": None,
1240        }
1241        """Stored dictionary of nuclei analysis results."""
1242
1243        self.images = images or {
1244            "nuclei": None,
1245            "nuclei_reduced": None,
1246            "nuclei_chromatinization": None,
1247        }
1248        """Stored dictionary of images from nuclei analysis."""
1249
1250        # sereies images
1251        self.series_im = False
1252        """Flag for batch/series image processing."""
1253
1254    def set_nms(self, nms: float):
1255        """
1256        Set the Non-Maximum Suppression (NMS) threshold.
1257
1258        The NMS threshold controls how aggressively overlapping detections are suppressed.
1259        A lower value reduces the probability of overlapping nuclei being kept.
1260
1261        Parameters
1262        ----------
1263        nms : float
1264            The NMS IoU threshold value.
1265        """
1266
1267        self.hyperparameter_nuclei["nms"] = nms
1268
1269    def set_prob(self, prob: float):
1270        """
1271        Set the probability threshold used in segmentation.
1272
1273        The probability threshold determines the minimum confidence required for an object
1274        (e.g., a nucleus) to be classified as a segmented entity. Higher values result in
1275        fewer segmented objects, as only detections with strong confidence scores are kept.
1276        This may lead to omission of weaker or less distinct structures.
1277
1278        Because optimal values depend on image characteristics, it is important to visually
1279        inspect segmentation results produced with different thresholds to determine the
1280        most suitable setting.
1281
1282        Parameters
1283        ----------
1284        prob : float
1285            The probability threshold value.
1286        """
1287
1288        self.hyperparameter_nuclei["prob"] = prob
1289
1290    def set_nuclei_circularity(self, circ: float):
1291        """
1292        This method sets 'circ' parameter. The circ is a parameter used for adjust minimal nucleus circularity.
1293
1294        Parameters
1295        ----------
1296        circ : float
1297            Nuclei circularity value.
1298        """
1299
1300        self.hyperparameter_nuclei["circularity"] = circ
1301
1302    def set_nuclei_local_intenisty_FC(self, local_FC: float):
1303        """
1304        This method sets the 'FC_diff_local_intensity' parameter. The 'local_FC' is used to remove false positive multiple nuclei that were detected in single image.
1305
1306        Parameters
1307        ----------
1308        local_FC : float
1309            local_FC value.
1310        """
1311
1312        self.hyperparameter_nuclei["FC_diff_local_intensity"] = local_FC
1313
1314    # change
1315    def set_nuclei_global_area_FC(self, global_FC: float):
1316        """
1317        This method sets the 'FC_diff_global' parameter. The 'global_FC' is used to remove false positive multiple nuclei that were detected in a single image and are outliers from the global mean area size.
1318
1319        Parameters
1320        ----------
1321        FC_diff_global : float
1322            global_FC value.
1323        """
1324
1325        self.hyperparameter_nuclei["FC_diff_global"] = global_FC
1326
1327    def set_nuclei_size(self, size: tuple):
1328        """
1329        This method sets 'size' parameter. The size is a parameter used for adjust minimal and maximal nucleus area (px).
1330
1331        Parameters
1332        ----------
1333        size : tuple
1334            (min_value, max_value)
1335        """
1336
1337        self.hyperparameter_nuclei["min_size"] = size[0]
1338        self.hyperparameter_nuclei["max_size"] = size[1]
1339
1340    def set_nuclei_min_mean_intensity(self, intensity: int):
1341        """
1342        This method sets 'intensity' parameter. The 'intensity' parameter is used to adjust the minimum mean intensity of all pixel intensities within the nucleus.
1343
1344        Parameters
1345        ----------
1346        intensity : int
1347            intensity value.
1348        """
1349
1350        self.hyperparameter_nuclei["intensity_mean"] = intensity
1351
1352    def set_chromatinization_size(self, size: tuple):
1353        """
1354        This method sets 'size' parameter. The size is a parameter used for adjust minimal and maximal chromanitization spot area (px) within the nucleus.
1355
1356        Parameters
1357        ----------
1358        size : tuple
1359            (min_value, max_value)
1360        """
1361
1362        self.hyperparameter_chromatinization["min_size"] = size[0]
1363        self.hyperparameter_chromatinization["max_size"] = size[1]
1364
1365    def set_chromatinization_ratio(self, ratio: int):
1366        """
1367        This method sets the 'ratio' parameter. In this case, the 'ratio' parameter is similar to 'circularity' as it describes the ratio between the maximum lengths in the x and y dimensions of the nucleus chromatinization.
1368
1369        Parameters
1370        ----------
1371        ratio : float
1372            ratio value.
1373        """
1374
1375        self.hyperparameter_chromatinization["ratio"] = ratio
1376
1377    def set_chromatinization_cut_point(self, cut_point: int):
1378        """
1379        This method sets the 'cut_point' parameter. The 'cut_point' parameter is a factor used to adjust the threshold for separating the background from chromatin spots.
1380
1381        Parameters
1382        ----------
1383        cut_point : int
1384            cut_point value.
1385        """
1386
1387        self.hyperparameter_chromatinization["cut_point"] = cut_point
1388
1389    #
1390
1391    def set_adj_image_gamma(self, gamma: float):
1392        """
1393        This method sets 'gamma' parameter. The gamma is a parameter used for adjust gamma of the nucleus image.
1394
1395        Parameters
1396        ----------
1397        gamma : float
1398            gamma value.
1399        """
1400
1401        self.img_adj_par["gamma"] = gamma
1402
1403    def set_adj_image_contrast(self, contrast: float):
1404        """
1405        This method sets 'contrast' parameter. The contrast is a parameter used for adjust contrast of the nucleus image.
1406
1407        Parameters
1408        ----------
1409        contrast : float
1410            contrast value.
1411        """
1412
1413        self.img_adj_par["contrast"] = contrast
1414
1415    def set_adj_image_brightness(self, brightness: float):
1416        """
1417        This method sets 'brightness' parameter. The brightness is a parameter used for adjust brightness of the nucleus image.
1418
1419        Parameters
1420        ----------
1421        brightness : float
1422            brightness value.
1423        """
1424
1425        self.img_adj_par["brightness"] = brightness
1426
1427    #
1428
1429    def set_adj_chrom_gamma(self, gamma: float):
1430        """
1431        This method sets 'gamma' parameter. The gamma is a parameter used for adjust gamma of the nucleus chromatinization image.
1432
1433        Parameters
1434        ----------
1435        gamma : float
1436            gamma value.
1437        """
1438
1439        self.img_adj_par_chrom["gamma"] = gamma
1440
1441    def set_adj_chrom_contrast(self, contrast: float):
1442        """
1443        This method sets 'contrast' parameter. The contrast is a parameter used for adjust contrast of the nucleus chromatinization image.
1444
1445        Parameters
1446        ----------
1447        contrast : float
1448            contrast value.
1449        """
1450
1451        self.img_adj_par_chrom["contrast"] = contrast
1452
1453    def set_adj_chrom_brightness(self, brightness: float):
1454        """
1455        This method sets 'brightness' parameter. The brightness is a parameter used for adjust brightness of the nucleus chromatinization image.
1456
1457        Parameters
1458        ----------
1459        brightness : float
1460            brightness value.
1461        """
1462
1463        self.img_adj_par_chrom["brightness"] = brightness
1464
1465    @property
1466    def current_parameters_nuclei(self):
1467        """
1468        This method returns current nuclei analysis parameters.
1469
1470        Returns
1471        -------
1472        dict
1473            Nuclei analysis parameters.
1474        """
1475        print(self.hyperparameter_nuclei)
1476        return self.hyperparameter_nuclei
1477
1478    @property
1479    def current_parameters_chromatinization(self):
1480        """
1481        This method returns current nuclei chromatinization analysis parameters.
1482
1483        Returns
1484        -------
1485        dict
1486            Nuclei chromatinization analysis parameters.
1487        """
1488
1489        print(self.hyperparameter_chromatinization)
1490        return self.hyperparameter_chromatinization
1491
1492    @property
1493    def current_parameters_img_adj(self):
1494        """
1495        This method returns current nuclei image setup.
1496
1497        Returns
1498        -------
1499        dict
1500            Nuclei image setup.
1501        """
1502
1503        print(self.img_adj_par)
1504        return self.img_adj_par
1505
1506    @property
1507    def current_parameters_img_adj_chro(self):
1508        """
1509        This method returns current nuclei chromatinization image setup.
1510
1511        Returns
1512        -------
1513        dict
1514            Nuclei chromatinization image setup.
1515        """
1516
1517        print(self.img_adj_par_chrom)
1518        return self.img_adj_par_chrom
1519
1520    def get_results_nuclei(self):
1521        """
1522        This function returns nuclei analysis results.
1523
1524        Returns
1525        -------
1526        dict
1527            Nuclei results in the dictionary format.
1528        """
1529
1530        if self.images["nuclei"] is None:
1531            print("No results to return!")
1532            return None
1533        else:
1534            if cfg._DISPLAY_MODE:
1535                if self.show_plots:
1536                    display_preview(self.resize_to_screen_img(self.images["nuclei"]))
1537            return self.nuclei_results["nuclei"], self.images["nuclei"]
1538
1539    def get_results_nuclei_selected(self):
1540        """
1541        This function returns the results of the nuclei analysis following adjustments to the data selection thresholds.
1542
1543        Returns
1544        -------
1545        dict
1546            Nuclei results in the dictionary format.
1547        """
1548
1549        if self.images["nuclei_reduced"] is None:
1550            print("No results to return!")
1551            return None
1552        else:
1553            if cfg._DISPLAY_MODE:
1554                if self.show_plots:
1555                    display_preview(
1556                        self.resize_to_screen_img(self.images["nuclei_reduced"])
1557                    )
1558            return self.nuclei_results["nuclei_reduced"], self.images["nuclei_reduced"]
1559
1560    def get_results_nuclei_chromatinization(self):
1561        """
1562        This function returns the results of the nuclei chromatinization analysis.
1563
1564        Returns
1565        -------
1566        dict
1567            Nuclei chromatinization results in the dictionary format.
1568        """
1569
1570        if self.images["nuclei_chromatinization"] is None:
1571            print("No results to return!")
1572            return None
1573        else:
1574            if cfg._DISPLAY_MODE:
1575                if self.show_plots:
1576                    display_preview(self.images["nuclei_chromatinization"])
1577            return (
1578                self.nuclei_results["nuclei_chromatinization"],
1579                self.images["nuclei_chromatinization"],
1580            )
1581
1582    def add_test(self, plots):
1583        self.test_results = plots
1584
1585        """
1586        Helper method.
1587        """
1588
1589    def input_image(self, img):
1590        """
1591        This method adds the image to the class for nuclei and/or chromatinization analysis.
1592
1593        Parameters
1594        ----------
1595        img : np.ndarray
1596            Input image.
1597        """
1598
1599        self.image = img
1600        self.add_test(None)
1601
1602    def get_features(self, model_out, image):
1603        """
1604        Extracts numerical feature descriptors from model output for a given image.
1605
1606        This method processes the output returned by a feature-extraction model
1607        (e.g., CNN, encoder network, statistical model) and converts it into a
1608        structured feature vector associated with the provided image.
1609        Typically used for downstream analysis, classification, or clustering.
1610
1611        Parameters
1612        ----------
1613        model_out : any
1614            Output returned by the feature-extraction model.
1615            The expected format depends on the model (e.g., tensor, dict, list of arrays).
1616
1617        image : ndarray
1618            The input image (2D or 3D array) for which features are being extracted.
1619            Provided for reference or for combining raw image metrics with model features.
1620
1621        Returns
1622        -------
1623        features : dict
1624            Dictionary containing extracted features.
1625            Keys correspond to feature names, and values are numerical descriptors.
1626        """
1627
1628        features = {
1629            "label": [],
1630            "area": [],
1631            "area_bbox": [],
1632            "area_convex": [],
1633            "area_filled": [],
1634            "axis_major_length": [],
1635            "axis_minor_length": [],
1636            "eccentricity": [],
1637            "equivalent_diameter_area": [],
1638            "feret_diameter_max": [],
1639            "solidity": [],
1640            "perimeter": [],
1641            "perimeter_crofton": [],
1642            "circularity": [],
1643            "intensity_max": [],
1644            "intensity_mean": [],
1645            "intensity_min": [],
1646            "ratio": [],
1647            "coords": [],
1648        }
1649
1650        for region in skimage.measure.regionprops(model_out, intensity_image=image):
1651
1652            # Compute circularity
1653            if region.perimeter > 0:
1654                circularity = 4 * np.pi * region.area / (region.perimeter**2)
1655            else:
1656                circularity = 0
1657
1658            features["area"].append(region.area)
1659            features["area_bbox"].append(region.area_bbox)
1660            features["area_convex"].append(region.area_convex)
1661            features["area_filled"].append(region.area_filled)
1662            features["axis_major_length"].append(region.axis_major_length)
1663            features["axis_minor_length"].append(region.axis_minor_length)
1664            features["eccentricity"].append(region.eccentricity)
1665            features["equivalent_diameter_area"].append(region.equivalent_diameter_area)
1666            features["feret_diameter_max"].append(region.feret_diameter_max)
1667            features["solidity"].append(region.solidity)
1668            features["perimeter"].append(region.perimeter)
1669            features["perimeter_crofton"].append(region.perimeter_crofton)
1670            features["label"].append(region.label)
1671            features["coords"].append(region.coords)
1672            features["circularity"].append(circularity)
1673            features["intensity_max"].append(np.max(region.intensity_max))
1674            features["intensity_min"].append(np.max(region.intensity_min))
1675            features["intensity_mean"].append(np.max(region.intensity_mean))
1676
1677        ratios = []
1678
1679        # Calculate the ratio for each pair of values
1680        for min_len, max_len in zip(
1681            features["axis_minor_length"], features["axis_major_length"]
1682        ):
1683            if max_len != 0:
1684                ratio = min_len / max_len
1685                ratios.append(ratio)
1686            else:
1687                ratios.append(float(0.0))
1688
1689        features["ratio"] = ratios
1690
1691        return features
1692
1693    # repaired stat
1694    def nuclei_finder_test(self):
1695        """
1696        This method performs testing analysis of parameters (specified 'nms' and 'prob' parameters)
1697        for the image provided by the input_image() method.
1698
1699        This method evaluates the performance of the internal NucleiFinder
1700        configuration using the currently loaded images, parameters, or model
1701        settings. It is typically used to check whether the detection, segmentation
1702        or preprocessing stages run correctly on sample data.
1703
1704        Examples
1705        --------
1706        >>> nf.nuclei_finder_test()
1707        >>> nf.browser_test()
1708        """
1709
1710        StarDist2D.from_pretrained()
1711        model = StarDist2D.from_pretrained("2D_versatile_fluo")
1712
1713        nmst = [0.1, 0.2, 0.6]
1714        probt = [0.1, 0.5, 0.9]
1715
1716        try:
1717            img = cv2.cvtColor(self.image, cv2.COLOR_BGR2GRAY)
1718        except:
1719            img = self.image
1720
1721        plot = []
1722
1723        # adj img
1724        img = adjust_img_16bit(
1725            img,
1726            brightness=self.img_adj_par["brightness"],
1727            contrast=self.img_adj_par["contrast"],
1728            gamma=self.img_adj_par["gamma"],
1729        )
1730        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
1731
1732        fig = plt.figure(dpi=300)
1733        plt.imshow(img)
1734        plt.axis("off")
1735        plt.title("Original", fontsize=25)
1736
1737        if cfg._DISPLAY_MODE:
1738            if self.show_plots:
1739                plt.show()
1740
1741        plot.append(fig)
1742
1743        for n in tqdm(nmst, desc="Loop 1: nmst"):
1744            print(f"\n➡️ Starting outer loop for n = {n}")
1745
1746            for t in tqdm(probt, desc=f"   ↳ Loop 2 for n={n}", leave=False):
1747                print(f"   → Starting inner loop for t = {t}")
1748
1749                labels, _ = model.predict_instances(
1750                    normalize(img.copy()), nms_thresh=n, prob_thresh=t
1751                )
1752
1753                tmp = self.get_features(model_out=labels, image=img)
1754
1755                fig = plt.figure(dpi=300)
1756                plt.imshow(render_label(labels, img=img))
1757                plt.axis("off")
1758                plt.title(
1759                    f"nms {n} & prob {t} \n detected nuc: {len(tmp['area'])}",
1760                    fontsize=25,
1761                )
1762
1763                if cfg._DISPLAY_MODE:
1764                    if self.show_plots:
1765                        plt.show()
1766
1767                plot.append(fig)
1768
1769        self.add_test(plot)
1770
1771    def find_nuclei(self):
1772        """
1773        Performs analysis on the image provided by the ``input_image()`` method
1774        using default or user-defined parameters.
1775
1776        To show current parameters, use:
1777            - ``current_parameters_nuclei``
1778            - ``current_parameters_img_adj``
1779
1780        To set new parameters, use:
1781            - ``set_nms()``
1782            - ``set_prob()``
1783            - ``set_adj_image_gamma()``
1784            - ``set_adj_image_contrast()``
1785            - ``set_adj_image_brightness()``
1786
1787        To get analysis results, use:
1788            - ``get_results_nuclei()``
1789        """
1790
1791        if isinstance(self.image, np.ndarray):
1792
1793            model = StarDist2D.from_pretrained("2D_versatile_fluo")
1794
1795            try:
1796                img = cv2.cvtColor(self.image, cv2.COLOR_BGR2GRAY)
1797            except:
1798                img = self.image
1799
1800            img = adjust_img_16bit(
1801                img,
1802                brightness=self.img_adj_par["brightness"],
1803                contrast=self.img_adj_par["contrast"],
1804                gamma=self.img_adj_par["gamma"],
1805            )
1806            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
1807            labels, _ = model.predict_instances(
1808                normalize(img),
1809                nms_thresh=self.hyperparameter_nuclei["nms"],
1810                prob_thresh=self.hyperparameter_nuclei["prob"],
1811            )
1812
1813            self.nuclei_results["nuclei"] = self.get_features(
1814                model_out=labels, image=img
1815            )
1816
1817            if len(self.nuclei_results["nuclei"]["coords"]) > 0:
1818
1819                oryginal = adjust_img_16bit(img, color="gray")
1820
1821                # series repaired nuclesu
1822                if self.series_im is True:
1823                    self.images["nuclei"] = oryginal
1824                else:
1825                    nuclei_mask = adjust_img_16bit(
1826                        cv2.cvtColor(
1827                            self.create_mask(self.nuclei_results["nuclei"], oryginal),
1828                            cv2.COLOR_BGR2GRAY,
1829                        ),
1830                        color="blue",
1831                    )
1832                    concatenated_image = cv2.hconcat([oryginal, nuclei_mask])
1833                    self.images["nuclei"] = concatenated_image
1834
1835                if cfg._DISPLAY_MODE:
1836                    if self.show_plots:
1837                        display_preview(
1838                            self.resize_to_screen_img(self.images["nuclei"])
1839                        )
1840
1841            else:
1842
1843                self.nuclei_results["nuclei"] = None
1844                self.nuclei_results["nuclei_reduced"] = None
1845                self.nuclei_results["nuclei_chromatinization"] = None
1846
1847                print("Nuclei not detected!")
1848
1849        else:
1850            print("\nAdd image firstly!")
1851
1852    def select_nuclei(self):
1853        """
1854        Selects data obtained from ``find_nuclei()`` based on the set threshold parameters.
1855
1856        To show current parameters, use:
1857            - ``current_parameters_nuclei``
1858
1859        To set new parameters, use:
1860            - ``set_nuclei_circularity()``
1861            - ``set_nuclei_size()``
1862            - ``set_nuclei_min_mean_intensity()``
1863
1864        To get analysis results, use:
1865            - ``get_results_nuclei_selected()``
1866        """
1867
1868        if self.nuclei_results["nuclei"] is not None:
1869            input_in = copy.deepcopy(self.nuclei_results["nuclei"])
1870
1871            nuclei_dictionary = self.drop_dict(
1872                input_in,
1873                key="area",
1874                var=self.hyperparameter_nuclei["min_size"],
1875                action=">",
1876            )
1877            nuclei_dictionary = self.drop_dict(
1878                nuclei_dictionary,
1879                key="area",
1880                var=self.hyperparameter_nuclei["max_size"],
1881                action="<",
1882            )
1883            nuclei_dictionary = self.drop_dict(
1884                nuclei_dictionary,
1885                key="intensity_mean",
1886                var=self.hyperparameter_nuclei["intensity_mean"],
1887                action=">",
1888            )
1889
1890            if len(nuclei_dictionary["coords"]) > 0:
1891
1892                self.nuclei_results["nuclei_reduced"] = nuclei_dictionary
1893
1894                try:
1895                    img = cv2.cvtColor(self.image, cv2.COLOR_BGR2GRAY)
1896                except:
1897                    img = self.image
1898
1899                oryginal = adjust_img_16bit(img, color="gray")
1900
1901                # series repaired nuclesu
1902                if self.series_im is True:
1903                    self.images["nuclei_reduced"] = oryginal
1904                else:
1905                    nuclei_mask = adjust_img_16bit(
1906                        cv2.cvtColor(
1907                            self.create_mask(
1908                                self.nuclei_results["nuclei_reduced"], oryginal
1909                            ),
1910                            cv2.COLOR_BGR2GRAY,
1911                        ),
1912                        color="blue",
1913                    )
1914                    concatenated_image = cv2.hconcat([oryginal, nuclei_mask])
1915
1916                    self.images["nuclei_reduced"] = concatenated_image
1917
1918                if cfg._DISPLAY_MODE:
1919                    if self.show_plots:
1920                        display_preview(
1921                            self.resize_to_screen_img(self.images["nuclei_reduced"])
1922                        )
1923
1924            else:
1925                self.nuclei_results["nuclei"] = None
1926                self.nuclei_results["nuclei_reduced"] = None
1927                self.nuclei_results["nuclei_chromatinization"] = None
1928
1929                print("Selected zero nuclei! Analysis stop!")
1930
1931        else:
1932            print("Lack of nuclei data to select!")
1933
1934    def nuclei_chromatinization(self):
1935        """
1936        Performs chromatinization analysis of nuclei using data obtained from
1937        ``find_nuclei()`` and/or ``select_nuclei()``.
1938
1939        To show current parameters, use:
1940            - ``current_parameters_chromatinization``
1941            - ``current_parameters_img_adj_chro``
1942
1943        To set new parameters, use:
1944            - ``set_chromatinization_size()``
1945            - ``set_chromatinization_ratio()``
1946            - ``set_chromatinization_cut_point()``
1947            - ``set_adj_chrom_gamma()``
1948            - ``set_adj_chrom_contrast()``
1949            - ``set_adj_chrom_brightness()``
1950
1951        To get analysis results, use:
1952            - ``get_results_nuclei_chromatinization()``
1953        """
1954
1955        def add_lists(f, g):
1956
1957            result = []
1958            max_length = max(len(f), len(g))
1959
1960            for i in range(max_length):
1961                f_elem = f[i] if i < len(f) else ""
1962                g_elem = g[i] if i < len(g) else ""
1963                result.append(f_elem + g_elem)
1964
1965            return result
1966
1967        def reverse_coords(image, x, y):
1968
1969            zero = np.zeros(image.shape)
1970
1971            zero[x, y] = 2**16
1972
1973            zero_indices = np.where(zero == 0)
1974
1975            return zero_indices[0], zero_indices[1]
1976
1977        if isinstance(self.nuclei_results["nuclei_reduced"], dict):
1978            nuclei_dictionary = self.nuclei_results["nuclei_reduced"]
1979        else:
1980            nuclei_dictionary = self.nuclei_results["nuclei"]
1981
1982        if nuclei_dictionary is not None:
1983            arrays_list = copy.deepcopy(nuclei_dictionary["coords"])
1984
1985            chromatione_info = {
1986                "area": [],
1987                "area_bbox": [],
1988                "area_convex": [],
1989                "area_filled": [],
1990                "axis_major_length": [],
1991                "axis_minor_length": [],
1992                "eccentricity": [],
1993                "equivalent_diameter_area": [],
1994                "feret_diameter_max": [],
1995                "solidity": [],
1996                "perimeter": [],
1997                "perimeter_crofton": [],
1998                "coords": [],
1999            }
2000
2001            full_im = np.zeros(self.image.shape[0:2], dtype=np.uint16)
2002            full_im = adjust_img_16bit(full_im)
2003
2004            for arr in arrays_list:
2005                x = list(arr[:, 0])
2006                y = list(arr[:, 1])
2007
2008                x1, y1 = reverse_coords(self.image, x, y)
2009
2010                regions_chro2 = self.image.copy()
2011
2012                regions_chro2[x1, y1] = 0
2013
2014                regions_chro2 = regions_chro2.astype("uint16")
2015
2016                try:
2017                    regions_chro2 = cv2.cvtColor(regions_chro2, cv2.COLOR_BGR2GRAY)
2018                except:
2019                    pass
2020
2021                regions_chro2 = adjust_img_16bit(
2022                    regions_chro2,
2023                    brightness=self.img_adj_par_chrom["brightness"],
2024                    contrast=self.img_adj_par_chrom["contrast"],
2025                    gamma=self.img_adj_par_chrom["gamma"],
2026                )
2027
2028                full_im = merge_images(
2029                    image_list=[full_im, regions_chro2], intensity_factors=[1, 1]
2030                )
2031
2032                ret, thresh = cv2.threshold(
2033                    regions_chro2[x, y],
2034                    0,
2035                    2**16 - 1,
2036                    cv2.THRESH_BINARY + cv2.THRESH_OTSU,
2037                )
2038
2039                regions_chro2[
2040                    regions_chro2
2041                    <= ret * self.hyperparameter_chromatinization["cut_point"]
2042                ] = 0
2043
2044                regions_chro2 = cv2.cvtColor(regions_chro2, cv2.COLOR_BGR2GRAY)
2045
2046                chromatione = regions_chro2 > 0
2047
2048                labeled_cells = measure.label(chromatione)
2049                regions = measure.regionprops(labeled_cells)
2050                regions = measure.regionprops(
2051                    labeled_cells, intensity_image=regions_chro2
2052                )
2053
2054                for region in regions:
2055
2056                    chromatione_info["area"].append(region.area)
2057                    chromatione_info["area_bbox"].append(region.area_bbox)
2058                    chromatione_info["area_convex"].append(region.area_convex)
2059                    chromatione_info["area_filled"].append(region.area_filled)
2060                    chromatione_info["axis_major_length"].append(
2061                        region.axis_major_length
2062                    )
2063                    chromatione_info["axis_minor_length"].append(
2064                        region.axis_minor_length
2065                    )
2066                    chromatione_info["eccentricity"].append(region.eccentricity)
2067                    chromatione_info["equivalent_diameter_area"].append(
2068                        region.equivalent_diameter_area
2069                    )
2070                    chromatione_info["feret_diameter_max"].append(
2071                        region.feret_diameter_max
2072                    )
2073                    chromatione_info["solidity"].append(region.solidity)
2074                    chromatione_info["perimeter"].append(region.perimeter)
2075                    chromatione_info["perimeter_crofton"].append(
2076                        region.perimeter_crofton
2077                    )
2078                    chromatione_info["coords"].append(region.coords)
2079
2080            ratios = []
2081
2082            for min_len, max_len in zip(
2083                chromatione_info["axis_minor_length"],
2084                chromatione_info["axis_major_length"],
2085            ):
2086                if max_len != 0:
2087                    ratio = min_len / max_len
2088                    ratios.append(ratio)
2089                else:
2090                    ratios.append(float(0.0))
2091
2092            chromatione_info["ratio"] = ratios
2093
2094            chromation_dic = self.drop_dict(
2095                chromatione_info,
2096                key="area",
2097                var=self.hyperparameter_chromatinization["min_size"],
2098                action=">",
2099            )
2100            chromation_dic = self.drop_dict(
2101                chromation_dic,
2102                key="area",
2103                var=self.hyperparameter_chromatinization["max_size"],
2104                action="<",
2105            )
2106            chromation_dic = self.drop_dict(
2107                chromation_dic,
2108                key="ratio",
2109                var=self.hyperparameter_chromatinization["ratio"],
2110                action=">",
2111            )
2112
2113            arrays_list2 = copy.deepcopy(chromation_dic["coords"])
2114
2115            nuclei_dictionary["spot_size_area"] = []
2116            nuclei_dictionary["spot_size_area_bbox"] = []
2117            nuclei_dictionary["spot_size_area_convex"] = []
2118            nuclei_dictionary["spot_size_area_filled"] = []
2119            nuclei_dictionary["spot_axis_major_length"] = []
2120            nuclei_dictionary["spot_axis_minor_length"] = []
2121            nuclei_dictionary["spot_eccentricity"] = []
2122            nuclei_dictionary["spot_size_equivalent_diameter_area"] = []
2123            nuclei_dictionary["spot_feret_diameter_max"] = []
2124            nuclei_dictionary["spot_perimeter"] = []
2125            nuclei_dictionary["spot_perimeter_crofton"] = []
2126
2127            for i, arr in enumerate(arrays_list):
2128
2129                spot_size_area = []
2130                spot_size_area_bbox = []
2131                spot_size_area_convex = []
2132                spot_size_area_convex = []
2133                spot_size_area_filled = []
2134                spot_axis_major_length = []
2135                spot_axis_minor_length = []
2136                spot_eccentricity = []
2137                spot_size_equivalent_diameter_area = []
2138                spot_feret_diameter_max = []
2139                spot_perimeter = []
2140                spot_perimeter_crofton = []
2141
2142                # Flatten the array,
2143                df_tmp = pd.DataFrame(arr)
2144                df_tmp["duplicates"] = add_lists(
2145                    [str(x) for x in df_tmp[0]], [str(y) for y in df_tmp[1]]
2146                )
2147
2148                counter_tmp = Counter(df_tmp["duplicates"])
2149
2150                for j, arr2 in enumerate(arrays_list2):
2151                    df_tmp2 = pd.DataFrame(arr2)
2152                    df_tmp2["duplicates"] = add_lists(
2153                        [str(x) for x in df_tmp2[0]], [str(y) for y in df_tmp2[1]]
2154                    )
2155
2156                    counter_tmp2 = Counter(df_tmp2["duplicates"])
2157                    intersection_length = len(counter_tmp.keys() & counter_tmp2.keys())
2158                    min_length = min(len(counter_tmp), len(counter_tmp2))
2159
2160                    if intersection_length >= 0.8 * min_length:
2161
2162                        if (
2163                            len(list(df_tmp2["duplicates"]))
2164                            / len(list(df_tmp["duplicates"]))
2165                        ) >= 0.025 and (
2166                            len(list(df_tmp2["duplicates"]))
2167                            / len(list(df_tmp["duplicates"]))
2168                        ) <= 0.5:
2169                            spot_size_area.append(chromation_dic["area"][j])
2170                            spot_size_area_bbox.append(chromation_dic["area_bbox"][j])
2171                            spot_size_area_convex.append(
2172                                chromation_dic["area_convex"][j]
2173                            )
2174                            spot_size_area_filled.append(
2175                                chromation_dic["area_filled"][j]
2176                            )
2177                            spot_axis_major_length.append(
2178                                chromation_dic["axis_major_length"][j]
2179                            )
2180                            spot_axis_minor_length.append(
2181                                chromation_dic["axis_minor_length"][j]
2182                            )
2183                            spot_eccentricity.append(chromation_dic["eccentricity"][j])
2184                            spot_size_equivalent_diameter_area.append(
2185                                chromation_dic["equivalent_diameter_area"][j]
2186                            )
2187                            spot_feret_diameter_max.append(
2188                                chromation_dic["feret_diameter_max"][j]
2189                            )
2190                            spot_perimeter.append(chromation_dic["perimeter"][j])
2191                            spot_perimeter_crofton.append(
2192                                chromation_dic["perimeter_crofton"][j]
2193                            )
2194
2195                nuclei_dictionary["spot_size_area"].append(spot_size_area)
2196                nuclei_dictionary["spot_size_area_bbox"].append(spot_size_area_bbox)
2197                nuclei_dictionary["spot_size_area_convex"].append(spot_size_area_convex)
2198                nuclei_dictionary["spot_size_area_filled"].append(spot_size_area_filled)
2199                nuclei_dictionary["spot_axis_major_length"].append(
2200                    spot_axis_major_length
2201                )
2202                nuclei_dictionary["spot_axis_minor_length"].append(
2203                    spot_axis_minor_length
2204                )
2205                nuclei_dictionary["spot_eccentricity"].append(spot_eccentricity)
2206                nuclei_dictionary["spot_size_equivalent_diameter_area"].append(
2207                    spot_size_equivalent_diameter_area
2208                )
2209                nuclei_dictionary["spot_feret_diameter_max"].append(
2210                    spot_feret_diameter_max
2211                )
2212                nuclei_dictionary["spot_perimeter"].append(spot_perimeter)
2213                nuclei_dictionary["spot_perimeter_crofton"].append(
2214                    spot_perimeter_crofton
2215                )
2216
2217            self.nuclei_results["chromatinization"] = chromation_dic
2218            self.nuclei_results["nuclei_chromatinization"] = nuclei_dictionary
2219
2220            self.images["nuclei_chromatinization"] = self.create_mask(
2221                chromation_dic, self.image
2222            )
2223
2224            img_chrom = adjust_img_16bit(
2225                cv2.cvtColor(
2226                    self.create_mask(
2227                        self.nuclei_results["chromatinization"], self.image
2228                    ),
2229                    cv2.COLOR_BGR2GRAY,
2230                ),
2231                color="yellow",
2232            )
2233
2234            if isinstance(self.nuclei_results["nuclei_reduced"], dict):
2235                nuclei_mask = adjust_img_16bit(
2236                    cv2.cvtColor(
2237                        self.create_mask(
2238                            self.nuclei_results["nuclei_reduced"], self.image
2239                        ),
2240                        cv2.COLOR_BGR2GRAY,
2241                    ),
2242                    color="blue",
2243                )
2244            else:
2245                nuclei_mask = adjust_img_16bit(
2246                    cv2.cvtColor(
2247                        self.create_mask(self.nuclei_results["nuclei"], self.image),
2248                        cv2.COLOR_BGR2GRAY,
2249                    ),
2250                    color="blue",
2251                )
2252
2253            nuclei_mask = merge_images([nuclei_mask, img_chrom], [1, 1])
2254
2255            try:
2256                img = cv2.cvtColor(full_im, cv2.COLOR_BGR2GRAY)
2257            except:
2258                img = full_im
2259
2260            oryginal = adjust_img_16bit(img, color="gray")
2261
2262            concatenated_image = cv2.hconcat([oryginal, nuclei_mask])
2263
2264            self.images["nuclei_chromatinization"] = concatenated_image
2265
2266            if cfg._DISPLAY_MODE:
2267                if self.show_plots:
2268                    display_preview(
2269                        self.resize_to_screen_img(
2270                            self.images["nuclei_chromatinization"]
2271                        )
2272                    )
2273
2274        else:
2275            print("Lack of nuclei data to select!")
2276
2277    # separate function for chromatinization
2278
2279    def _nuclei_chromatinization_series(self, image, nuclei_data):
2280        """
2281        Helper method for performing chromatinization analysis on nuclei detected in the provided image.
2282        """
2283
2284        def add_lists(f, g):
2285            result = []
2286            max_length = max(len(f), len(g))
2287
2288            for i in range(max_length):
2289                f_elem = f[i] if i < len(f) else ""
2290                g_elem = g[i] if i < len(g) else ""
2291                result.append(f_elem + g_elem)
2292
2293            return result
2294
2295        def reverse_coords(image, x, y):
2296
2297            zero = np.zeros(image.shape)
2298
2299            zero[x, y] = 2**16
2300
2301            zero_indices = np.where(zero == 0)
2302
2303            return zero_indices[0], zero_indices[1]
2304
2305        nuclei_dictionary = nuclei_data.copy()
2306
2307        if nuclei_dictionary is not None:
2308            arrays_list = copy.deepcopy(nuclei_dictionary["coords"])
2309
2310            chromatione_info = {
2311                "area": [],
2312                "area_bbox": [],
2313                "area_convex": [],
2314                "area_filled": [],
2315                "axis_major_length": [],
2316                "axis_minor_length": [],
2317                "eccentricity": [],
2318                "equivalent_diameter_area": [],
2319                "feret_diameter_max": [],
2320                "solidity": [],
2321                "perimeter": [],
2322                "perimeter_crofton": [],
2323                "coords": [],
2324            }
2325
2326            full_im = np.zeros(image.shape[0:2], dtype=np.uint16)
2327            full_im = adjust_img_16bit(full_im)
2328
2329            for arr in arrays_list:
2330                x = list(arr[:, 0])
2331                y = list(arr[:, 1])
2332
2333                x1, y1 = reverse_coords(image, x, y)
2334
2335                regions_chro2 = image.copy()
2336
2337                regions_chro2[x1, y1] = 0
2338
2339                regions_chro2 = regions_chro2.astype("uint16")
2340
2341                try:
2342                    regions_chro2 = cv2.cvtColor(regions_chro2, cv2.COLOR_BGR2GRAY)
2343                except:
2344                    pass
2345
2346                regions_chro2 = adjust_img_16bit(
2347                    regions_chro2,
2348                    brightness=self.img_adj_par_chrom["brightness"],
2349                    contrast=self.img_adj_par_chrom["contrast"],
2350                    gamma=self.img_adj_par_chrom["gamma"],
2351                )
2352
2353                full_im = merge_images(
2354                    image_list=[full_im, regions_chro2], intensity_factors=[1, 1]
2355                )
2356
2357                ret, _ = cv2.threshold(
2358                    regions_chro2[x, y],
2359                    0,
2360                    2**16 - 1,
2361                    cv2.THRESH_BINARY + cv2.THRESH_OTSU,
2362                )
2363
2364                regions_chro2[
2365                    regions_chro2
2366                    <= ret * self.hyperparameter_chromatinization["cut_point"]
2367                ] = 0
2368
2369                regions_chro2 = cv2.cvtColor(regions_chro2, cv2.COLOR_BGR2GRAY)
2370
2371                chromatione = regions_chro2 > 0
2372
2373                labeled_cells = measure.label(chromatione)
2374                regions = measure.regionprops(labeled_cells)
2375                regions = measure.regionprops(
2376                    labeled_cells, intensity_image=regions_chro2
2377                )
2378
2379                for region in regions:
2380
2381                    chromatione_info["area"].append(region.area)
2382                    chromatione_info["area_bbox"].append(region.area_bbox)
2383                    chromatione_info["area_convex"].append(region.area_convex)
2384                    chromatione_info["area_filled"].append(region.area_filled)
2385                    chromatione_info["axis_major_length"].append(
2386                        region.axis_major_length
2387                    )
2388                    chromatione_info["axis_minor_length"].append(
2389                        region.axis_minor_length
2390                    )
2391                    chromatione_info["eccentricity"].append(region.eccentricity)
2392                    chromatione_info["equivalent_diameter_area"].append(
2393                        region.equivalent_diameter_area
2394                    )
2395                    chromatione_info["feret_diameter_max"].append(
2396                        region.feret_diameter_max
2397                    )
2398                    chromatione_info["solidity"].append(region.solidity)
2399                    chromatione_info["perimeter"].append(region.perimeter)
2400                    chromatione_info["perimeter_crofton"].append(
2401                        region.perimeter_crofton
2402                    )
2403                    chromatione_info["coords"].append(region.coords)
2404
2405            ratios = []
2406
2407            for min_len, max_len in zip(
2408                chromatione_info["axis_minor_length"],
2409                chromatione_info["axis_major_length"],
2410            ):
2411                if max_len != 0:
2412                    ratio = min_len / max_len
2413                    ratios.append(ratio)
2414                else:
2415                    ratios.append(float(0.0))
2416
2417            chromatione_info["ratio"] = ratios
2418
2419            chromation_dic = self.drop_dict(
2420                chromatione_info,
2421                key="area",
2422                var=self.hyperparameter_chromatinization["min_size"],
2423                action=">",
2424            )
2425            chromation_dic = self.drop_dict(
2426                chromation_dic,
2427                key="area",
2428                var=self.hyperparameter_chromatinization["max_size"],
2429                action="<",
2430            )
2431            chromation_dic = self.drop_dict(
2432                chromation_dic,
2433                key="ratio",
2434                var=self.hyperparameter_chromatinization["ratio"],
2435                action=">",
2436            )
2437
2438            arrays_list2 = copy.deepcopy(chromation_dic["coords"])
2439
2440            nuclei_dictionary["spot_size_area"] = []
2441            nuclei_dictionary["spot_size_area_bbox"] = []
2442            nuclei_dictionary["spot_size_area_convex"] = []
2443            nuclei_dictionary["spot_size_area_filled"] = []
2444            nuclei_dictionary["spot_axis_major_length"] = []
2445            nuclei_dictionary["spot_axis_minor_length"] = []
2446            nuclei_dictionary["spot_eccentricity"] = []
2447            nuclei_dictionary["spot_size_equivalent_diameter_area"] = []
2448            nuclei_dictionary["spot_feret_diameter_max"] = []
2449            nuclei_dictionary["spot_perimeter"] = []
2450            nuclei_dictionary["spot_perimeter_crofton"] = []
2451
2452            for arr in arrays_list:
2453
2454                spot_size_area = []
2455                spot_size_area_bbox = []
2456                spot_size_area_convex = []
2457                spot_size_area_convex = []
2458                spot_size_area_filled = []
2459                spot_axis_major_length = []
2460                spot_axis_minor_length = []
2461                spot_eccentricity = []
2462                spot_size_equivalent_diameter_area = []
2463                spot_feret_diameter_max = []
2464                spot_perimeter = []
2465                spot_perimeter_crofton = []
2466
2467                # Flatten the array,
2468                df_tmp = pd.DataFrame(arr)
2469                df_tmp["duplicates"] = add_lists(
2470                    [str(x) for x in df_tmp[0]], [str(y) for y in df_tmp[1]]
2471                )
2472
2473                counter_tmp = Counter(df_tmp["duplicates"])
2474
2475                for j, arr2 in enumerate(arrays_list2):
2476                    df_tmp2 = pd.DataFrame(arr2)
2477                    df_tmp2["duplicates"] = add_lists(
2478                        [str(x) for x in df_tmp2[0]], [str(y) for y in df_tmp2[1]]
2479                    )
2480
2481                    counter_tmp2 = Counter(df_tmp2["duplicates"])
2482                    intersection_length = len(counter_tmp.keys() & counter_tmp2.keys())
2483                    min_length = min(len(counter_tmp), len(counter_tmp2))
2484
2485                    if intersection_length >= 0.8 * min_length:
2486
2487                        if (
2488                            len(list(df_tmp2["duplicates"]))
2489                            / len(list(df_tmp["duplicates"]))
2490                        ) >= 0.025 and (
2491                            len(list(df_tmp2["duplicates"]))
2492                            / len(list(df_tmp["duplicates"]))
2493                        ) <= 0.5:
2494                            spot_size_area.append(chromation_dic["area"][j])
2495                            spot_size_area_bbox.append(chromation_dic["area_bbox"][j])
2496                            spot_size_area_convex.append(
2497                                chromation_dic["area_convex"][j]
2498                            )
2499                            spot_size_area_filled.append(
2500                                chromation_dic["area_filled"][j]
2501                            )
2502                            spot_axis_major_length.append(
2503                                chromation_dic["axis_major_length"][j]
2504                            )
2505                            spot_axis_minor_length.append(
2506                                chromation_dic["axis_minor_length"][j]
2507                            )
2508                            spot_eccentricity.append(chromation_dic["eccentricity"][j])
2509                            spot_size_equivalent_diameter_area.append(
2510                                chromation_dic["equivalent_diameter_area"][j]
2511                            )
2512                            spot_feret_diameter_max.append(
2513                                chromation_dic["feret_diameter_max"][j]
2514                            )
2515                            spot_perimeter.append(chromation_dic["perimeter"][j])
2516                            spot_perimeter_crofton.append(
2517                                chromation_dic["perimeter_crofton"][j]
2518                            )
2519
2520                nuclei_dictionary["spot_size_area"].append(spot_size_area)
2521                nuclei_dictionary["spot_size_area_bbox"].append(spot_size_area_bbox)
2522                nuclei_dictionary["spot_size_area_convex"].append(spot_size_area_convex)
2523                nuclei_dictionary["spot_size_area_filled"].append(spot_size_area_filled)
2524                nuclei_dictionary["spot_axis_major_length"].append(
2525                    spot_axis_major_length
2526                )
2527                nuclei_dictionary["spot_axis_minor_length"].append(
2528                    spot_axis_minor_length
2529                )
2530                nuclei_dictionary["spot_eccentricity"].append(spot_eccentricity)
2531                nuclei_dictionary["spot_size_equivalent_diameter_area"].append(
2532                    spot_size_equivalent_diameter_area
2533                )
2534                nuclei_dictionary["spot_feret_diameter_max"].append(
2535                    spot_feret_diameter_max
2536                )
2537                nuclei_dictionary["spot_perimeter"].append(spot_perimeter)
2538                nuclei_dictionary["spot_perimeter_crofton"].append(
2539                    spot_perimeter_crofton
2540                )
2541
2542            self.nuclei_results["chromatinization"] = chromation_dic
2543            self.nuclei_results["nuclei_chromatinization"] = nuclei_dictionary
2544
2545            self.images["nuclei_chromatinization"] = self.create_mask(
2546                chromation_dic, image
2547            )
2548
2549            img_chrom = adjust_img_16bit(
2550                cv2.cvtColor(
2551                    self.create_mask(self.nuclei_results["chromatinization"], image),
2552                    cv2.COLOR_BGR2GRAY,
2553                ),
2554                color="yellow",
2555            )
2556
2557            nuclei_mask = adjust_img_16bit(
2558                cv2.cvtColor(self.create_mask(nuclei_data, image), cv2.COLOR_BGR2GRAY),
2559                color="blue",
2560            )
2561
2562            nuclei_mask = merge_images([nuclei_mask, img_chrom], [1, 1])
2563
2564            try:
2565                img = cv2.cvtColor(full_im, cv2.COLOR_BGR2GRAY)
2566            except:
2567                img = full_im
2568
2569            oryginal = adjust_img_16bit(img, color="gray")
2570
2571            concatenated_image = cv2.hconcat([oryginal, nuclei_mask])
2572
2573            self.images["nuclei_chromatinization"] = concatenated_image
2574
2575            if cfg._DISPLAY_MODE:
2576                if self.show_plots:
2577                    display_preview(
2578                        self.resize_to_screen_img(
2579                            self.images["nuclei_chromatinization"]
2580                        )
2581                    )
2582
2583        else:
2584            print("Lack of nuclei data to select!")
2585
2586    def browser_test(self):
2587        """
2588        Displays test results generated by the ``nuclei_finder_test()`` method
2589        in the default web browser.
2590        """
2591
2592        html_content = ""
2593
2594        for fig in self.test_results:
2595            buf = BytesIO()
2596            fig.savefig(buf, format="png", bbox_inches="tight")
2597            buf.seek(0)
2598
2599            img_base64 = base64.b64encode(buf.read()).decode("utf-8")
2600
2601            html_content += f'<img src="data:image/png;base64,{img_base64}" style="margin:10px;"/>\n'
2602
2603        with tempfile.NamedTemporaryFile(
2604            mode="w", delete=False, suffix=".html"
2605        ) as tmp_file:
2606            tmp_file.write(html_content)
2607            tmp_filename = tmp_file.name
2608
2609        webbrowser.open_new_tab(tmp_filename)
2610
2611    def series_analysis_chromatinization(
2612        self,
2613        path_to_images: str,
2614        file_extension: str = "tiff",
2615        selected_id: list = [],
2616        fille_name_part: str = "",
2617        selection_opt: bool = True,
2618        include_img: bool = True,
2619        test_series: int = 0,
2620    ):
2621        """
2622        Performs full analysis on images provided via the ``input_image()`` method
2623        using default or user-defined parameters.
2624
2625        This method runs nuclei detection, nuclei selection, and chromatinization
2626        analysis in a single pipeline. Users can adjust parameters for each step
2627        before running the analysis.
2628
2629        To show current parameters, use:
2630            - ``current_parameters_nuclei``
2631            - ``current_parameters_img_adj``
2632            - ``current_parameters_chromatinization``
2633            - ``current_parameters_img_adj_chro``
2634
2635        To set new parameters, use:
2636            - ``set_nms()``
2637            - ``set_prob()``
2638            - ``set_adj_image_gamma()``
2639            - ``set_adj_image_contrast()``
2640            - ``set_adj_image_brightness()``
2641            - ``set_nuclei_circularity()``
2642            - ``set_nuclei_size()``
2643            - ``set_nuclei_min_mean_intensity()``
2644            - ``set_chromatinization_size()``
2645            - ``set_chromatinization_ratio()``
2646            - ``set_chromatinization_cut_point()``
2647            - ``set_adj_chrom_gamma()``
2648            - ``set_adj_chrom_contrast()``
2649            - ``set_adj_chrom_brightness()``
2650
2651        Parameters
2652        ----------
2653        path_to_images : str
2654            Path to the directory containing images for analysis.
2655
2656        file_extension : str, optional
2657            Extension of the image files. Default is 'tiff'.
2658
2659        selected_id : list, optional
2660            List of IDs that must be part of the image name to distinguish them
2661            from others. Default is an empty list, which means all images in
2662            the directory will be processed.
2663
2664        fille_name_part : str, optional
2665            Part of the file name to filter images. Default is an empty string.
2666
2667        selection_opt : bool, optional
2668            Whether to run ``select_nuclei()`` with the defined parameters. Default is True.
2669
2670        include_img : bool, optional
2671            Whether to include the images in the result dictionary. Default is True.
2672
2673        test_series : int, optional
2674            Number of images to test the parameters and return results. Default is 0,
2675            which means all images in the directory will be processed.
2676
2677        Returns
2678        -------
2679        results_dict : dict
2680            Dictionary containing results for each image in the directory.
2681            Keys correspond to image file names.
2682
2683        Notes
2684        -----
2685        This method runs the complete nuclei and chromatinization analysis pipeline.
2686
2687        Parameters must be set appropriately before calling to ensure correct results.
2688        """
2689
2690        results_dict = {}
2691        results_img = {}
2692        results_img_raw = {}
2693
2694        files = glob.glob(os.path.join(path_to_images, "*." + file_extension))
2695
2696        if len(fille_name_part) > 0:
2697            files = [x for x in files if fille_name_part.lower() in x.lower()]
2698
2699        if len(selected_id) > 0:
2700            selected_id = [str(x) for x in selected_id]
2701            files = [
2702                x
2703                for x in files
2704                if re.sub("_.*", "", os.path.basename(x)) in selected_id
2705            ]
2706
2707        if test_series > 0:
2708
2709            files = random.sample(files, test_series)
2710
2711        self.show_plots = False
2712        self.series_im = True
2713
2714        print("\nFile analysis:\n\n")
2715
2716        for file in tqdm(files):
2717
2718            print(file)
2719
2720            self.show_plots = False
2721
2722            image = self.load_image(file)
2723
2724            self.input_image(image)
2725
2726            self.find_nuclei()
2727
2728            tmp = None
2729
2730            if selection_opt is True:
2731                self.select_nuclei()
2732                tmp = self.get_results_nuclei_selected()
2733
2734            else:
2735                tmp = self.get_results_nuclei()
2736
2737            if tmp is not None:
2738
2739                if tmp[0] is not None:
2740
2741                    results_dict[str(os.path.basename(file))] = tmp[0]
2742                    results_img[str(os.path.basename(file))] = tmp[1]
2743                    results_img_raw[str(os.path.basename(file))] = image
2744                    del tmp
2745                    del image
2746
2747        results_dict_tmp = self.repairing_nuclei(results_dict)
2748
2749        results_dict = {}
2750
2751        print("\nChromatization searching:\n\n")
2752
2753        for ke in tqdm(results_dict_tmp.keys()):
2754
2755            tmp = None
2756
2757            try:
2758                self._nuclei_chromatinization_series(
2759                    results_img_raw[ke], results_dict_tmp[ke]
2760                )
2761                tmp = self.get_results_nuclei_chromatinization()
2762            except:
2763                print(f"Sample {ke} could not be processed.")
2764
2765            if tmp is not None:
2766
2767                if tmp[0] is not None:
2768
2769                    tmp[0].pop("coords")
2770
2771                    if include_img:
2772                        results_dict[str(os.path.basename(ke))] = {
2773                            "stats": tmp[0],
2774                            "img": cv2.hconcat([results_img[ke], tmp[1]]),
2775                        }
2776                        del tmp
2777                    else:
2778                        results_dict[str(os.path.basename(ke))] = tmp[0]
2779                        del tmp
2780
2781            else:
2782                print(f"Unable to obtain results for {print(ke)}")
2783
2784        self.show_plots = True
2785        self.series_im = False
2786
2787        return results_dict
2788
2789    def series_analysis_nuclei(
2790        self,
2791        path_to_images: str,
2792        file_extension: str = "tiff",
2793        selected_id: list = [],
2794        fille_name_part: str = "",
2795        selection_opt: bool = True,
2796        include_img: bool = True,
2797        test_series: int = 0,
2798    ):
2799        """
2800        Performs analysis on the image provided by the ``input_image()`` method
2801        using default or user-defined parameters.
2802
2803        This method runs nuclei detection and selection using the currently set
2804        parameters. Users can adjust image preprocessing and nuclei detection
2805        parameters before running the analysis.
2806
2807        To show current parameters, use:
2808            - ``current_parameters_nuclei``
2809            - ``current_parameters_img_adj``
2810
2811        To set new parameters, use:
2812            - ``set_nms()``
2813            - ``set_prob()``
2814            - ``set_adj_image_gamma()``
2815            - ``set_adj_image_contrast()``
2816            - ``set_adj_image_brightness()``
2817            - ``set_nuclei_circularity()``
2818            - ``set_nuclei_size()``
2819            - ``set_nuclei_min_mean_intensity()``
2820
2821        Parameters
2822        ----------
2823        path_to_images : str
2824            Path to the directory containing images for analysis.
2825
2826        file_extension : str, optional
2827            Extension of the image files. Default is 'tiff'.
2828
2829        selected_id : list, optional
2830            List of IDs that must be part of the image name to distinguish them
2831            from others. Default is an empty list, which means all images in
2832            the directory will be processed.
2833
2834        fille_name_part : str, optional
2835            Part of the file name to filter images. Default is an empty string.
2836
2837        selection_opt : bool, optional
2838            Whether to run the ``select_nuclei()`` method with the defined parameters.
2839            Default is True.
2840
2841        include_img : bool, optional
2842            Whether to include the images in the result dictionary. Default is True.
2843
2844        test_series : int, optional
2845            Number of images to test the parameters and return results. Default is 0,
2846            which means all images in the directory will be processed.
2847
2848        Returns
2849        -------
2850        results_dict : dict
2851            Dictionary containing results for each image in the directory.
2852            Keys correspond to image file names.
2853        """
2854
2855        results_dict = {}
2856        results_img = {}
2857
2858        files = glob.glob(os.path.join(path_to_images, "*." + file_extension))
2859
2860        if len(fille_name_part) > 0:
2861            files = [x for x in files if fille_name_part.lower() in x.lower()]
2862
2863        if len(selected_id) > 0:
2864            selected_id = [str(x) for x in selected_id]
2865            files = [
2866                x
2867                for x in files
2868                if re.sub("_.*", "", os.path.basename(x)) in selected_id
2869            ]
2870
2871        if test_series > 0:
2872
2873            files = random.sample(files, test_series)
2874
2875        self.show_plots = False
2876        self.series_im = True
2877
2878        print("\nFile analysis:\n\n")
2879
2880        for file in tqdm(files):
2881
2882            print(file)
2883
2884            image = self.load_image(file)
2885
2886            self.input_image(image)
2887
2888            self.find_nuclei()
2889
2890            if self.nuclei_results["nuclei"] is not None:
2891
2892                tmp = [None]
2893
2894                if selection_opt is True:
2895                    self.select_nuclei()
2896                    tmp = self.get_results_nuclei_selected()
2897
2898                else:
2899                    tmp = self.get_results_nuclei()
2900
2901                if tmp is not None:
2902
2903                    if tmp[0] is not None:
2904
2905                        if include_img:
2906                            results_dict[str(os.path.basename(file))] = tmp[0]
2907                            results_img[str(os.path.basename(file))] = tmp[1]
2908
2909                            del tmp
2910
2911                        else:
2912                            results_dict[str(os.path.basename(file))] = tmp[0]
2913                            del tmp
2914
2915                else:
2916                    print(f"Unable to obtain results for {print(file)}")
2917
2918            else:
2919
2920                print(f"Unable to obtain results for {print(file)}")
2921
2922        self.show_plots = True
2923        self.series_im = False
2924
2925        results_dict_tmp = self.repairing_nuclei(results_dict)
2926
2927        if include_img is False:
2928
2929            return results_dict_tmp
2930
2931        else:
2932
2933            results_dict = {}
2934
2935            for ke in results_dict_tmp.keys():
2936
2937                nuclei_mask = adjust_img_16bit(
2938                    cv2.cvtColor(
2939                        self.create_mask(results_dict_tmp[ke], results_img[ke]),
2940                        cv2.COLOR_BGR2GRAY,
2941                    ),
2942                    color="blue",
2943                )
2944                concatenated_image = cv2.hconcat([results_img[ke], nuclei_mask])
2945
2946                cred = results_dict_tmp[ke]
2947                # cred.pop('coords')
2948
2949                results_dict[ke] = {"stats": cred, "img": concatenated_image}
2950
2951            return results_dict
2952
2953
2954class NucleiDataManagement:
2955    """
2956    Manages nuclei analysis data obtained from the `NucleiFinder` class,
2957    including nuclei properties and optionally Image Stream (IS) data.
2958
2959    This class allows loading nuclei data from JSON files or directly from
2960    `NucleiFinder` analysis results, converting them to pandas DataFrames,
2961    adding IS data, concatenating results from multiple experiments, and
2962    saving results in JSON or CSV format. It also provides helper methods
2963    for merging, filtering, and retrieving data.
2964
2965    Attributes
2966    ----------
2967    nuceli_data : dict
2968        Dictionary storing nuclei properties for each image or experiment.
2969
2970    experiment_name : str
2971        Name of the experiment.
2972
2973    nuceli_data_df : pd.DataFrame or None
2974        DataFrame representation of nuclei properties.
2975
2976    nuclei_IS_data : pd.DataFrame or None
2977        DataFrame of nuclei data merged with IS data.
2978
2979    concat_data : list or None
2980        List of other `NucleiDataManagement` objects added for combined analysis.
2981
2982    Methods
2983    -------
2984    load_nuc_dict(path)
2985        Load nuclei data from a JSON dictionary file (*.nuc) and initialize the object.
2986        _convert_to_df()
2987        Convert nuclei dictionary data to a pandas DataFrame.
2988
2989    add_IS_data(IS_data, IS_features)
2990        Merge Image Stream (IS) data with nuclei data.
2991
2992    get_data()
2993        Retrieve the nuclei data as a pandas DataFrame.
2994
2995    get_data_with_IS()
2996        Retrieve the nuclei data merged with IS data.
2997
2998    save_nuc_project(path)
2999        Save nuclei data as a JSON file with *.nuc extension.
3000
3001    save_results_df(path)
3002        Save nuclei data as a CSV file.
3003
3004    save_results_df_with_IS(path)
3005        Save nuclei data merged with IS data as a CSV file.
3006
3007    add_experiment(data_list)
3008        Add other `NucleiDataManagement` objects for concatenated analysis.
3009
3010    get_mutual_experiments_data(inc_is)
3011        Retrieve concatenated nuclei data from multiple experiments.
3012
3013    save_mutual_experiments(path, inc_is)
3014        Save concatenated data from multiple experiments as a CSV file.
3015    """
3016
3017    def __init__(self, nuclei_data: dict, experiment_name: str):
3018        """
3019        Initialize a NucleiDataManagement object with nuclei data and experiment name.
3020
3021        Parameters
3022        ----------
3023        nuclei_data : dict
3024            Dictionary containing nuclei properties for each image or experiment.
3025            If the dictionary entries have keys 'stats' and 'img', only 'stats' are stored.
3026
3027        experiment_name : str
3028            Name of the experiment.
3029
3030        Attributes
3031        ----------
3032        nuceli_data : dict
3033            Dictionary storing nuclei properties for each image or experiment.
3034
3035        experiment_name : str
3036            Name of the experiment.
3037
3038        nuceli_data_df : pd.DataFrame or None
3039            DataFrame representation of nuclei properties (initialized as None).
3040
3041        nuclei_IS_data : pd.DataFrame or None
3042            DataFrame of nuclei data merged with Image Stream (IS) data (initialized as None).
3043
3044        concat_data : list or None
3045            List of other `NucleiDataManagement` objects added for combined analysis (initialized as None).
3046        """
3047
3048        if set(nuclei_data[list(nuclei_data.keys())[0]].keys()) == set(
3049            ["stats", "img"]
3050        ):
3051
3052            self.nuceli_data = {}
3053
3054            for k in nuclei_data.keys():
3055                self.nuceli_data[k] = nuclei_data[k]["stats"]
3056
3057            for k in self.nuceli_data.keys():
3058                if "coords" in self.nuceli_data[k].keys():
3059                    self.nuceli_data[k].pop("coords")
3060
3061        else:
3062            self.nuceli_data = nuclei_data
3063
3064            for k in self.nuceli_data.keys():
3065                if "coords" in self.nuceli_data[k].keys():
3066                    self.nuceli_data[k].pop("coords")
3067
3068        self.experiment_name = experiment_name
3069        """Name of the experiment."""
3070
3071        self.nuceli_data_df = None
3072        """Stored DataFrame representation of nuclei features"""
3073
3074        self.nuclei_IS_data = None
3075        """Stored DataFrame of data from Image Stream (IS)."""
3076
3077        self.concat_data = None
3078        """Sotored list of other `NucleiDataManagement` objects."""
3079
3080    @classmethod
3081    def load_nuc_dict(cls, path: str):
3082        """
3083        Initialize a NucleiDataManagement object from a JSON dictionary file.
3084
3085        The loaded data must be previously saved using the ``save_nuc_project()`` method.
3086
3087        Parameters
3088        ----------
3089        path : str
3090            Path to the *.nuc JSON file containing nuclei data.
3091        """
3092
3093        if ".nuc" in path:
3094
3095            if os.path.exists(path):
3096
3097                with open(path, "r") as json_file:
3098                    loaded_data = json.load(json_file)
3099
3100                return cls(loaded_data, os.path.splitext(os.path.basename(path))[0])
3101
3102            else:
3103                raise ValueError("\nInvalid path!")
3104
3105        else:
3106            raise ValueError(
3107                "\nInvalid dictionary to load. It must contain a .nuc extension!"
3108            )
3109
3110    def _convert_to_df(self):
3111        """
3112        Helper method that converts the internal nuclei dictionary into a pandas DataFrame.
3113
3114        This method iterates over the nuclei data stored in `self.nuceli_data`,
3115        flattens the information for each nucleus, computes aggregate statistics
3116        for associated spots if present, and stores the resulting DataFrame in
3117        `self.nuceli_data_df`.
3118        """
3119
3120        nuclei_data = self.nuceli_data
3121
3122        data = []
3123
3124        for i in tqdm(nuclei_data.keys()):
3125            for n, _ in enumerate(nuclei_data[i]["area"]):
3126                row = {
3127                    "id_name": re.sub("_.*", "", i),
3128                    "nuclei_area": nuclei_data[i]["area"][n],
3129                    "nuclei_area_bbox": nuclei_data[i]["area_bbox"][n],
3130                    "nuclei_equivalent_diameter_area": nuclei_data[i][
3131                        "equivalent_diameter_area"
3132                    ][n],
3133                    "nuclei_feret_diameter_max": nuclei_data[i]["feret_diameter_max"][
3134                        n
3135                    ],
3136                    "nuclei_axis_major_length": nuclei_data[i]["axis_major_length"][n],
3137                    "nuclei_axis_minor_length": nuclei_data[i]["axis_minor_length"][n],
3138                    "nuclei_circularity": nuclei_data[i]["circularity"][n],
3139                    "nuclei_eccentricity": nuclei_data[i]["eccentricity"][n],
3140                    "nuclei_perimeter": nuclei_data[i]["perimeter"][n],
3141                    "nuclei_ratio": nuclei_data[i]["ratio"][n],
3142                    "nuclei_solidity": nuclei_data[i]["solidity"][n],
3143                }
3144
3145                if "spot_size_area" in nuclei_data[i]:
3146                    if len(nuclei_data[i]["spot_size_area"][n]) > 0:
3147                        row.update(
3148                            {
3149                                "spot_n": len(nuclei_data[i]["spot_size_area"][n]),
3150                                "avg_spot_area": np.mean(
3151                                    nuclei_data[i]["spot_size_area"][n]
3152                                ),
3153                                "avg_spot_area_bbox": np.mean(
3154                                    nuclei_data[i]["spot_size_area_bbox"][n]
3155                                ),
3156                                "avg_spot_perimeter": np.mean(
3157                                    nuclei_data[i]["spot_perimeter"][n]
3158                                ),
3159                                "sum_spot_area": np.sum(
3160                                    nuclei_data[i]["spot_size_area"][n]
3161                                ),
3162                                "sum_spot_area_bbox": np.sum(
3163                                    nuclei_data[i]["spot_size_area_bbox"][n]
3164                                ),
3165                                "sum_spot_perimeter": np.sum(
3166                                    nuclei_data[i]["spot_perimeter"][n]
3167                                ),
3168                                "avg_spot_axis_major_length": np.mean(
3169                                    nuclei_data[i]["spot_axis_major_length"][n]
3170                                ),
3171                                "avg_spot_axis_minor_length": np.mean(
3172                                    nuclei_data[i]["spot_axis_minor_length"][n]
3173                                ),
3174                                "avg_spot_eccentricity": np.mean(
3175                                    nuclei_data[i]["spot_eccentricity"][n]
3176                                ),
3177                                "avg_spot_size_equivalent_diameter_area": np.mean(
3178                                    nuclei_data[i][
3179                                        "spot_size_equivalent_diameter_area"
3180                                    ][n]
3181                                ),
3182                                "sum_spot_size_equivalent_diameter_area": np.sum(
3183                                    nuclei_data[i][
3184                                        "spot_size_equivalent_diameter_area"
3185                                    ][n]
3186                                ),
3187                            }
3188                        )
3189                    else:
3190                        row.update(
3191                            {
3192                                k: 0
3193                                for k in [
3194                                    "spot_n",
3195                                    "avg_spot_area",
3196                                    "avg_spot_area_bbox",
3197                                    "avg_spot_perimeter",
3198                                    "sum_spot_area",
3199                                    "sum_spot_area_bbox",
3200                                    "sum_spot_perimeter",
3201                                    "avg_spot_axis_major_length",
3202                                    "avg_spot_axis_minor_length",
3203                                    "avg_spot_eccentricity",
3204                                    "avg_spot_size_equivalent_diameter_area",
3205                                    "sum_spot_size_equivalent_diameter_area",
3206                                ]
3207                            }
3208                        )
3209
3210                data.append(row)
3211
3212        nuclei_df = pd.DataFrame(data)
3213
3214        nuclei_df["nuclei_per_img"] = nuclei_df.groupby("id_name")["id_name"].transform(
3215            "count"
3216        )
3217        nuclei_df["set"] = self.experiment_name
3218
3219        self.nuceli_data_df = nuclei_df
3220
3221    def add_IS_data(self, IS_data: pd.DataFrame, IS_features: list = []):
3222        """
3223        Merge Image Stream (IS) data with nuclei analysis data.
3224
3225        This method concatenates IS (Image Stream, https://cytekbio.com/pages/imagestream)
3226        results with the nuclei data stored in the object. The merge is performed based
3227        on object IDs, allowing joint analysis of nuclei features and IS features.
3228
3229        Parameters
3230        ----------
3231        IS_data : pd.DataFrame
3232            DataFrame containing IS data results.
3233
3234        IS_features : list, optional
3235            List of features to extract from the IS data. Default is an empty list.
3236
3237        Notes
3238        -----
3239        The merged data will be stored in the attribute `self.nuclei_IS_data`.
3240        """
3241
3242        nuclei_data = self._get_df()
3243
3244        IS_data["set"] = self.experiment_name
3245
3246        if len(IS_features) > 0:
3247            IS_features = list(set(IS_features + ["Object Number", "set"]))
3248            IS_data = IS_data[IS_features]
3249
3250        nuclei_data["id"] = (
3251            nuclei_data["id_name"].astype(str) + "_" + nuclei_data["set"]
3252        )
3253        IS_data["id"] = IS_data["Object Number"].astype(str) + "_" + IS_data["set"]
3254
3255        merged_data = pd.merge(nuclei_data, IS_data, on="id", how="left")
3256        merged_data.pop("set_x")
3257        merged_data = merged_data.rename(columns={"set_y": "set"})
3258
3259        self.nuclei_IS_data = merged_data
3260
3261    def _get_df(self):
3262        """
3263        Helper method to retrieve the nuclei data as a pandas DataFrame.
3264
3265        If the internal DataFrame `self.nuceli_data_df` has not been created yet,
3266        this method calls `_convert_to_df()` to generate it from `self.nuceli_data`.
3267        """
3268
3269        if self.nuceli_data_df is None:
3270            self._convert_to_df()
3271
3272        return self.nuceli_data_df
3273
3274    def get_data_with_IS(self):
3275        """
3276        Retrieve nuclei results for a single project including IS data.
3277
3278        Returns
3279        -------
3280        pd.DataFrame or None
3281            DataFrame containing nuclei results merged with IS (Image Stream) data
3282            added via `self.add_IS_data()`. Returns None if no IS data has been added.
3283        """
3284
3285        if self.nuclei_IS_data is None:
3286            print("\nNothing to return!")
3287        return self.nuclei_IS_data
3288
3289    def get_data(self):
3290        """
3291        Retrieve nuclei results for a single project as a pandas DataFrame.
3292
3293        Returns
3294        -------
3295        pd.DataFrame
3296            DataFrame containing nuclei analysis results for the experiment.
3297        """
3298
3299        return self._get_df()
3300
3301    def save_nuc_project(self, path: str = ""):
3302        """
3303        Save nuclei results as a JSON file with a *.nuc extension.
3304
3305        The saved data can later be loaded using the `cls.load_nuc_dict()` method.
3306        Results must be obtained from the `NucleiFinder` class using
3307        `series_analysis_nuclei()` or `series_analysis_chromatinization()` methods.
3308
3309        Parameters
3310        ----------
3311        path : str, optional
3312            Directory where the results will be saved. Default is the current working directory.
3313        """
3314
3315        data = self.nuceli_data
3316
3317        if len(data.keys()) > 0:
3318            full_path = os.path.join(path, self.experiment_name)
3319
3320            with open(full_path + ".nuc", "w") as json_file:
3321                json.dump(data, json_file, indent=4)
3322        else:
3323            print("\nData not provided!")
3324
3325    def save_results_df(self, path: str = ""):
3326        """
3327        Save nuclei results for a single project as a CSV file.
3328
3329        Results must be obtained from the `NucleiFinder` class using
3330        `series_analysis_nuclei()` or `series_analysis_chromatinization()` methods.
3331
3332        Parameters
3333        ----------
3334        path : str, optional
3335            Directory where the CSV file will be saved. Default is the current working directory.
3336        """
3337
3338        data = self.get_data()
3339
3340        full_path = os.path.join(path, f"{self.experiment_name}.csv")
3341
3342        data.to_csv(full_path, index=False)
3343
3344    def save_results_df_with_IS(self, path: str = ""):
3345        """
3346        Save nuclei results with IS data for a single project as a CSV file.
3347
3348        Results must be obtained from the `NucleiFinder` class using
3349        `series_analysis_nuclei()` or `series_analysis_chromatinization()` methods.
3350        IS data should have been added via `self.add_IS_data()`.
3351
3352        Parameters
3353        ----------
3354        path : str, optional
3355            Directory where the CSV file will be saved. Default is the current working directory.
3356        """
3357
3358        data = self.get_data_with_IS()
3359
3360        if data is None:
3361            raise ValueError("There was nothing to save.")
3362
3363        full_path = os.path.join(path, f"{self.experiment_name}_IS.csv")
3364        data.to_csv(full_path, index=False)
3365
3366    def add_experiment(self, data_list: list):
3367        """
3368        Add additional NucleiDataManagement objects from other experiments for concatenation.
3369
3370        Parameters
3371        ----------
3372        data_list : list
3373            List of `NucleiDataManagement` objects from separate experiments to be added.
3374        """
3375
3376        valid_class = []
3377        for obj in data_list:
3378            if isinstance(obj, self.__class__):
3379                valid_class.append(obj)
3380            else:
3381                print(f"Object {obj} is invalid type.")
3382
3383        self.concat_data = valid_class
3384
3385    def get_mutual_experiments_data(self, inc_is: bool = False):
3386        """
3387        Retrieve concatenated NucleiDataManagement data from other added experiments.
3388
3389        Parameters
3390        ----------
3391        inc_is : bool, optional
3392            Whether to include IS (Image Stream) data, if it was added to each experiment. Default is False.
3393
3394        Returns
3395        -------
3396        pd.DataFrame
3397            Concatenated nuclei data (with or without IS data) from all added experiments.
3398        """
3399
3400        if self.concat_data is not None:
3401            if inc_is:
3402
3403                try:
3404                    final_df = pd.concat(
3405                        [x.get_data_with_IS() for x in self.concat_data]
3406                        + [self.get_data_with_IS()]
3407                    )
3408                except:
3409                    raise ValueError(
3410                        "Lack of IS data in some object. Check if the IS data was added to each project."
3411                    )
3412
3413            else:
3414                final_df = pd.concat(
3415                    [x.get_data() for x in self.concat_data] + [self.get_data()]
3416                )
3417
3418            return final_df
3419
3420        raise ValueError("No object to concatenate. Nothing to return!")
3421
3422    def save_mutual_experiments(self, path: str = "", inc_is: bool = False):
3423        """
3424        Save concatenated NucleiDataManagement data from added experiments as a CSV file.
3425
3426        Parameters
3427        ----------
3428        inc_is : bool, optional
3429            Whether to include IS (Image Stream) data, if it was added to each experiment. Default is False.
3430        """
3431
3432        dt = self.get_mutual_experiments_data(inc_is=inc_is)
3433
3434        experimets = [self.experiment_name] + [
3435            n.experiment_name for n in self.concat_data
3436        ]
3437
3438        experimets_names = "_".join(experimets)
3439
3440        if inc_is:
3441            full_path = os.path.join(path, f"{experimets_names}_IS.csv")
3442        else:
3443            full_path = os.path.join(path, f"{experimets_names}.csv")
3444
3445        dt.to_csv(full_path, index=False)
3446
3447
3448class GroupAnalysis:
3449    """
3450    A class for performing multivariate analysis, dimensionality reduction,
3451    clustering, and differential feature analysis (DFA) on biological or
3452    experimental datasets.
3453
3454    This class provides tools for:
3455    - Scaling and PCA of input data
3456    - UMAP embedding and DBSCAN clustering
3457    - Differential Feature Analysis across groups
3458    - Proportion analysis and plotting
3459    - Data selection and merging with metadata
3460
3461    Attributes
3462    ----------
3463    input_data : pd.DataFrame
3464        The primary dataset containing features for analysis.
3465
3466    input_metadata : pd.DataFrame
3467        Metadata corresponding to the input data, including identifiers and group labels.
3468
3469    tmp_data : pd.DataFrame
3470        Temporary copy of the input data, used for feature selection and filtering.
3471
3472    tmp_metadata : pd.DataFrame
3473        Temporary copy of metadata, used for filtered or subsetted operations.
3474
3475    scaled_data : np.ndarray or None
3476        Scaled version of the temporary dataset (`tmp_data`), updated after `data_scale()`.
3477
3478    PCA_results : np.ndarray or None
3479        Results of PCA transformation applied on scaled data.
3480
3481    var_data : np.ndarray or None
3482        Explained variance ratio from PCA.
3483
3484    knee_plot : matplotlib.figure.Figure or None
3485        Figure of cumulative explained variance for PCA components.
3486
3487    UMAP_data : np.ndarray or None
3488        Embedding results from UMAP dimensionality reduction.
3489
3490    UMAP_plot : dict
3491        Dictionary containing UMAP plots. Keys: 'static' (matplotlib) and 'html' (plotly).
3492
3493    dblabels : list or None
3494        Cluster labels assigned by DBSCAN after UMAP embedding.
3495
3496    explained_variance_ratio : np.ndarray or None
3497        Explained variance ratio of PCA components.
3498
3499    DFA_results : pd.DataFrame or None
3500        Results of Differential Feature Analysis (DFA).
3501
3502    proportion_stats : pd.DataFrame or None
3503        Statistics from proportion analysis.
3504
3505    proportion_plot : matplotlib.figure.Figure or None
3506        Figure of proportion analysis results.
3507
3508    Methods
3509    -------
3510    resest_project():
3511        Reset all temporary and analysis results to initial state.
3512
3513    load_data(data, ids_col='id_name', set_col='set'):
3514        Class method to load data and metadata and initialize the object.
3515
3516    groups:
3517        Property returning available groups in the metadata.
3518
3519    get_DFA(), get_PCA(), get_knee_plot(), get_var_data(), get_scaled_data():
3520        Methods to retrieve previously computed results.
3521
3522    UMAP(), db_scan(), UMAP_on_clusters():
3523        Methods for dimensionality reduction and clustering visualization.
3524
3525    DFA(meta_group_by='sets', sets={}, n_proc=5):
3526        Perform Differential Feature Analysis.
3527
3528    proportion_analysis(grouping_col='sets', val_col='nuclei_per_img', ...):
3529        Perform and plot proportion analysis across groups.
3530    """
3531
3532    def __init__(
3533        self,
3534        input_data,
3535        input_metadata,
3536    ):
3537        """
3538        Initialize a GroupAnalysis instance with data and metadata.
3539
3540        Parameters
3541        ----------
3542        input_data : pd.DataFrame
3543            Dataset containing features for analysis. Rows represent samples and columns represent features.
3544
3545        input_metadata : pd.DataFrame
3546            Metadata corresponding to `input_data`, including sample identifiers and group labels.
3547        """
3548
3549        self.input_data = input_data
3550        """Stored input dataset for analysis."""
3551
3552        self.input_metadata = input_metadata
3553        """Stored metadata associated with `input_data`."""
3554
3555        self.tmp_metadata = input_metadata
3556        """Temporary copy of `input_data` used for filtering, selection, or scaling."""
3557
3558        self.tmp_data = input_data
3559        """Temporary copy of `input_metadata` used for filtered operations."""
3560
3561        self.scaled_data = None
3562        """Stored scaled version of `tmp_data` after normalization or standardization."""
3563
3564        self.PCA_results = None
3565        """ Stored results of PCA transformation applied on `scaled_data`."""
3566
3567        self.var_data = None
3568        """Sotred explained variance ratio for PCA components."""
3569
3570        self.knee_plot = None
3571        """Figure showing cumulative explained variance for PCA."""
3572
3573        self.UMAP_data = None
3574        """Stored embedding coordinates from UMAP dimensionality reduction."""
3575
3576        self.UMAP_plot = {"static": {}, "html": {}}
3577        """Stored dictionary containing UMAP plots: 'static' (matplotlib) and 'html' (plotly)."""
3578
3579        self.dblabels = None
3580        """Stored cluster labels assigned by DBSCAN after UMAP embedding."""
3581
3582        self.explained_variance_ratio = None
3583        """Stored explained variance ratio of PCA components."""
3584
3585        self.DFA_results = None
3586        """Stored Differential Feature Analysis (DFA) results."""
3587
3588        self.proportion_stats = None
3589        """Stored statistics from proportion analysis of groups."""
3590
3591        self.proportion_plot = None
3592        """Figure visualizing proportion analysis results."""
3593
3594    def resest_project(self):
3595        """
3596        Resets the project state by clearing or reinitializing various attributes.
3597
3598        This method resets the following attributes to initial values:
3599        - `tmp_metadata`
3600        - `tmp_data`
3601        - `scaled_data`
3602        - `PCA_results`
3603        - `var_data`
3604        - `knee_plot`
3605        - `UMAP_data`
3606        - `UMAP_plot`
3607        - `dblabels`
3608        - `explained_variance_ratio`
3609        - `DFA_results`
3610
3611        This method is typically called to reinitialize the project data and results, preparing the system for new computations or project resets.
3612        """
3613
3614        self.tmp_metadata = self.input_metadata
3615        self.tmp_data = self.input_data
3616        self.scaled_data = None
3617        self.PCA_results = None
3618        self.var_data = None
3619        self.knee_plot = None
3620        self.UMAP_data = None
3621        self.UMAP_plot = {"static": {}, "html": {}}
3622        self.dblabels = None
3623        self.explained_variance_ratio = None
3624        self.DFA_results = None
3625        self.proportion_stats = None
3626        self.proportion_plot = None
3627
3628    @classmethod
3629    def load_data(cls, data, ids_col: str = "id_name", set_col: str = "set"):
3630        """
3631        Load data and initialize the class by storing both the feature data and metadata.
3632
3633        Parameters
3634        ----------
3635        data : pd.DataFrame
3636            Input dataset used for group analysis. Must contain both feature columns and
3637            metadata columns specified by `ids_col` and `set_col`.
3638
3639        ids_col : str, optional
3640            Name of the column containing unique object identifiers.
3641            Default is ``'id_name'``.
3642
3643        set_col : str, optional
3644            Name of the column specifying group or set assignment for each object.
3645            Default is ``'set'``.
3646
3647        Notes
3648        -----
3649        This method performs in-place initialization of the class and does not return
3650        a separate object. All loaded data and metadata become available through the
3651        class attributes for downstream analysis.
3652
3653        This method updates internal class attributes:
3654
3655        - **input_data** : pd.DataFrame
3656          Cleaned feature table with index set to object IDs.
3657
3658        - **tmp_data** : pd.DataFrame
3659          Copy of `input_data` used for temporary operations.
3660
3661        - **input_metadata** : pd.DataFrame
3662          Metadata containing object IDs and group assignments.
3663
3664        - **tmp_metadata** : pd.DataFrame
3665          Copy of `input_metadata` for temporary operations.
3666        """
3667
3668        data = data.dropna()
3669
3670        metadata = pd.DataFrame()
3671        metadata["id"] = data[ids_col]
3672        metadata["sets"] = data[set_col]
3673
3674        data.index = data[ids_col]
3675
3676        try:
3677            data.pop("id_name")
3678        except:
3679            None
3680
3681        try:
3682            data.pop("Object Number")
3683        except:
3684            None
3685
3686        return cls(data, metadata)
3687
3688    @property
3689    def groups(self):
3690        """
3691        Return information about available groups in the metadata for ``self.DFA``.
3692
3693        Returns
3694        -------
3695        dict
3696            Dictionary mapping each metadata column name to a list of unique groups
3697            available in that column.
3698        """
3699
3700        try:
3701            return {
3702                "sets": set(self.tmp_metadata["sets"]),
3703                "full_name": set(self.tmp_metadata["full_name"]),
3704            }
3705        except:
3706            return {"sets": set(self.tmp_metadata["sets"])}
3707
3708    def get_DFA(self):
3709        """
3710        Retrieve the DFA results produced by the ``DFA()`` method.
3711
3712        Returns
3713        -------
3714        pd.DataFrame
3715            The DFA results stored in ``self.DFA_results``.
3716        """
3717
3718        if None in self.DFA_results:
3719            print("\nNo results to return! Please run the DFA() method first.")
3720        else:
3721            return self.DFA_results
3722
3723    def get_PCA(self):
3724        """
3725        Retrieve the PCA results produced by the ``PCA()`` method.
3726
3727        Returns
3728        -------
3729        np.ndarray
3730            The PCA results stored in ``self.PCA_results``.
3731        """
3732
3733        if None in self.PCA_results:
3734            print("\nNo results to return! Please run the PCA() method first.")
3735        else:
3736            return self.PCA_results
3737
3738    def get_knee_plot(self, show: bool = True):
3739        """
3740        Retrieve the knee plot of cumulative explained variance generated by the ``var_plot()`` method.
3741
3742        Parameters
3743        ----------
3744        show : bool, optional
3745            If ``True`` (default), the knee plot is displayed.
3746
3747        Returns
3748        -------
3749        matplotlib.figure.Figure
3750            The figure object containing the knee plot.
3751        """
3752
3753        if self.knee_plot is None:
3754            print("\nNo results to return! Please run the var_plot() method first.")
3755        else:
3756            if cfg._DISPLAY_MODE:
3757                if show is True:
3758                    self.knee_plot
3759                    try:
3760                        display(self.knee_plot)
3761                    except:
3762                        None
3763
3764            return self.knee_plot
3765
3766    def get_var_data(self):
3767        """
3768        Retrieve the explained variance data from the ``var_plot()`` method.
3769
3770        Returns
3771        -------
3772        np.ndarray
3773            Array containing the explained variance values stored in ``self.var_data``.
3774        """
3775
3776        if None in self.var_data:
3777            print("\nNo results to return! Please run the var_plot() method first.")
3778        else:
3779            return self.var_data
3780
3781    def get_scaled_data(self):
3782        """
3783        Retrieve the scaled data produced by the ``data_scale()`` method.
3784
3785        Returns
3786        -------
3787        np.ndarray
3788            Scaled data stored in ``self.scaled_data``.
3789        """
3790
3791        if None in self.scaled_data:
3792            print("\nNo results to return! Please run the data_scale() method first.")
3793        else:
3794            return self.scaled_data
3795
3796    def get_UMAP_data(self):
3797        """
3798        Retrieve the UMAP-transformed data generated by the ``UMAP()`` method.
3799
3800        Returns
3801        -------
3802        np.ndarray
3803            UMAP-embedded data stored in ``self.UMAP_data``.
3804        """
3805
3806        if None in self.UMAP_data:
3807            print("\nNo results to return! Please run the UMAP() method first.")
3808        else:
3809            return self.UMAP_data
3810
3811    def get_UMAP_plots(self, plot_type: str = "static", show: bool = True):
3812        """
3813        Retrieve UMAP plots generated by the ``UMAP()`` and/or ``UMAP_on_clusters()`` methods.
3814
3815        Parameters
3816        ----------
3817        show : bool, optional
3818            Whether to display the UMAP plots. Default is True.
3819
3820        Returns
3821        -------
3822        dict of matplotlib.figure.Figure
3823            A dictionary containing the UMAP plots. Keys correspond to plot names, and values are the figure objects.
3824        """
3825
3826        if plot_type == "html":
3827
3828            if len(self.UMAP_plot["html"].keys()) == 0:
3829                print(
3830                    "\nNo results to return! Please run the UMAP() and / or UMAP_on_clusters() methods first."
3831                )
3832            else:
3833                if cfg._DISPLAY_MODE:
3834                    if show:
3835                        for k in self.UMAP_plot["html"].keys():
3836                            self.UMAP_plot["html"][k]
3837                            try:
3838                                display(self.UMAP_plot["html"][k])
3839                            except:
3840                                None
3841
3842                return self.UMAP_plot["html"]
3843
3844        else:
3845
3846            if len(self.UMAP_plot["static"].keys()) == 0:
3847                print(
3848                    "\nNo results to return! Please run the UMAP() and / or UMAP_on_clusters() methods first."
3849                )
3850            else:
3851                if cfg._DISPLAY_MODE:
3852                    if show:
3853                        for k in self.UMAP_plot["static"].keys():
3854                            self.UMAP_plot["static"][k]
3855                            try:
3856                                display(self.UMAP_plot["static"][k])
3857                            except:
3858                                None
3859
3860                return self.UMAP_plot["static"]
3861
3862    def select_data(self, features_list: list = []):
3863        """
3864        Select specific features (columns) from the dataset for further analysis.
3865
3866        Parameters
3867        ----------
3868        features_list : list of str, optional
3869            List of feature names (column names) to select from the dataset. Default is an empty list, which selects no features.
3870
3871        Notes
3872        -----
3873        Modifies the `self.tmp_data` attribute to contain only the selected features from `self.input_data`.
3874        """
3875
3876        dat = self.input_data.copy()
3877
3878        not_in_columns = [name for name in features_list if name not in dat.columns]
3879
3880        if not_in_columns:
3881            print("These names are not in data", not_in_columns)
3882        else:
3883            print("All names are present in data.")
3884
3885        in_columns = [name for name in features_list if name in dat.columns]
3886
3887        dat = dat[in_columns]
3888
3889        self.tmp_data = dat
3890
3891    def data_scale(self):
3892        """
3893        Scale the data using standardization (z-score normalization).
3894
3895        This method applies `StandardScaler` from scikit-learn to the temporary dataset (`self.tmp_data`) and stores the scaled data.
3896
3897        Notes
3898        -----
3899        Modifies the `self.scaled_data` attribute to contain the standardized version of `self.tmp_data`.
3900        """
3901
3902        if None not in self.tmp_data:
3903
3904            def is_id_column(name: str):
3905                name_lower = name.lower()
3906                return name_lower == "id" or "id_" in name_lower or "_id" in name_lower
3907
3908            tmp = self.tmp_data
3909
3910            cols_with_strings = [
3911                c
3912                for c in tmp.columns
3913                if tmp[c].apply(lambda x: isinstance(x, str)).any()
3914            ]
3915
3916            cols_id_pattern = [c for c in tmp.columns if is_id_column(c)]
3917
3918            cols_to_drop = list(set(cols_id_pattern + cols_with_strings))
3919
3920            tmp = tmp.drop(columns=cols_to_drop)
3921
3922            scaler = StandardScaler()
3923
3924            self.scaled_data = scaler.fit_transform(tmp)
3925
3926        else:
3927            print(
3928                "\nNo data to scale. Please use the load_data() method first, and optionally the select_data() method."
3929            )
3930
3931    def PCA(self):
3932        """
3933        Perform Principal Component Analysis (PCA) on the scaled data.
3934
3935        This method reduces the dimensionality of `self.scaled_data` while retaining the maximum variance.
3936
3937        Notes
3938        -----
3939        Modifies the `self.PCA_results` attribute with the PCA-transformed data.
3940        """
3941
3942        if None not in self.scaled_data:
3943            pca = PCA(n_components=self.scaled_data.shape[1])
3944            self.PCA_results = pca.fit_transform(self.scaled_data)
3945            self.explained_variance_ratio = pca.explained_variance_ratio_
3946        else:
3947            print("\nNo data for PCA. Please use the data_scale() method first.")
3948
3949    def var_plot(self):
3950        """
3951        Plot the cumulative explained variance of the principal components from PCA.
3952
3953        This method visualizes the cumulative explained variance to help determine how many components capture most of the variance.
3954
3955        Notes
3956        -----
3957        Stores results in the following attributes:
3958        - `self.var_data` (np.ndarray): Explained variance ratio for each principal component.
3959        - `self.knee_plot` (matplotlib.figure.Figure): Figure of the cumulative explained variance plot.
3960        """
3961
3962        if None not in self.PCA_results:
3963
3964            fig, _ = plt.subplots(figsize=(15, 7))
3965            explained_var = self.explained_variance_ratio
3966
3967            cumulative_var = np.cumsum(explained_var)
3968
3969            # Plot the cumulative explained variance as a function of the number of components
3970            plt.plot(cumulative_var)
3971            plt.xlabel("Number of Components")
3972            plt.ylabel("Cumulative Explained Variance")
3973            plt.title("Explained variance of PCs")
3974            plt.xticks(np.arange(0, len(cumulative_var) + 1, step=1))
3975
3976            self.var_data = explained_var
3977            self.knee_plot = fig
3978
3979        else:
3980
3981            print(
3982                "\nNo data for variance explanation analysis. Please use the PCA() method first."
3983            )
3984
3985    def UMAP(
3986        self,
3987        PC_num: int = 5,
3988        factorize_with_metadata: bool = False,
3989        harmonize_sets: bool = True,
3990        n_neighbors: int = 25,
3991        min_dist: float = 0.01,
3992        n_components: int = 2,
3993        width: int = 8,
3994        height: int = 6,
3995    ):
3996        """
3997         Perform UMAP (Uniform Manifold Approximation and Projection) dimensionality reduction on PCA results.
3998
3999         UMAP is applied to the top principal components, optionally using metadata labels to influence the embedding. Generates both 2D/3D embeddings and visualizations.
4000
4001         Parameters
4002         ----------
4003         PC_num : int, optional
4004             Number of top principal components to use for UMAP embedding. Default is 5.
4005
4006         factorize_with_metadata : bool, optional
4007             Whether to use metadata (e.g., 'sets') to factorize UMAP embedding. Default is False.
4008
4009        harmonize_sets : bool, optional
4010             If True, applies harmonization across data sets before computing the UMAP embedding.
4011             Default is True.
4012
4013         n_neighbors : int, optional
4014             Number of neighbors for UMAP to compute local structure. Default is 25.
4015
4016         min_dist : float, optional
4017             Minimum distance between points in the low-dimensional embedding. Default is 0.01.
4018
4019         n_components : int, optional
4020             Number of dimensions for the UMAP embedding. Default is 2.
4021
4022         width : int, optional
4023             Width of the generated matplotlib figures (in inches). Default is 8.
4024
4025         height : int, optional
4026             Height of the generated matplotlib figures (in inches). Default is 6.
4027
4028         Notes
4029         -----
4030         Stores results in the following attributes:
4031         - `self.UMAP_data` (np.ndarray): UMAP-transformed data.
4032         - `self.UMAP_plot['static']['PrimaryUMAP']` (matplotlib.figure.Figure): Static visualization of UMAP embedding.
4033         - `self.UMAP_plot['html']['PrimaryUMAP']` (plotly.graph_objs.Figure): Interactive Plotly visualization of UMAP embedding.
4034        """
4035
4036        if None not in self.PCA_results:
4037
4038            reducer = umap.UMAP(
4039                n_neighbors=n_neighbors,
4040                min_dist=min_dist,
4041                n_components=n_components,
4042                random_state=42,
4043            )
4044
4045            pca_res = self.PCA_results
4046
4047            if harmonize_sets:
4048
4049                pca_res = np.array(pca_res)
4050
4051                pca_res = np.array(
4052                    harmonize.run_harmony(
4053                        pca_res, self.input_metadata, vars_use="sets"
4054                    ).Z_corr
4055                ).T
4056
4057            if factorize_with_metadata:
4058                numeric_labels = pd.Categorical(self.tmp_metadata["sets"]).codes
4059
4060                umap_result = reducer.fit_transform(
4061                    pca_res[:, : PC_num + 1], y=numeric_labels
4062                )
4063
4064            else:
4065                umap_result = reducer.fit_transform(pca_res[:, : PC_num + 1])
4066
4067            umap_result_plot = pd.DataFrame(umap_result.copy())
4068
4069            umap_result_plot["clusters"] = list(self.tmp_metadata["sets"])
4070
4071            static_fig = umap_static(umap_result_plot, width=width, height=height)
4072
4073            html_fig = umap_html(
4074                umap_result_plot, width=width * 100, height=height * 100
4075            )
4076
4077            self.UMAP_data = umap_result
4078
4079            self.UMAP_plot["static"]["PrimaryUMAP"] = static_fig
4080            self.UMAP_plot["html"]["PrimaryUMAP"] = html_fig
4081
4082        else:
4083
4084            print("\nNo data for UMAP. Please use the PCA() method first.")
4085
4086    def db_scan(self, eps=0.5, min_samples: int = 10):
4087        """
4088        Perform DBSCAN clustering on UMAP-transformed data.
4089
4090        DBSCAN identifies clusters based on density, labeling points in dense regions as clusters and others as noise.
4091
4092        Parameters
4093        ----------
4094        eps : float, optional
4095            Maximum distance between two points to be considered neighbors. Default is 0.5.
4096
4097        min_samples : int, optional
4098            Minimum number of points required to form a dense region (cluster). Default is 10.
4099
4100        Notes
4101        -----
4102        Stores the results in the following attribute:
4103        - `self.dblabels` (list of str): Cluster labels assigned by DBSCAN for each point in the UMAP embedding.
4104        """
4105
4106        if None not in self.UMAP_data:
4107
4108            dbscan = DBSCAN(eps=eps, min_samples=min_samples)
4109            dbscan_labels = dbscan.fit_predict(self.UMAP_data)
4110            self.dblabels = [str(x) for x in dbscan_labels]
4111
4112        else:
4113
4114            print("\nNo data for DBSCAN. Please use the UMAP() method first.")
4115
4116    def UMAP_on_clusters(
4117        self,
4118        min_entities: int = 50,
4119        width: int = 8,
4120        height: int = 6,
4121        n_per_col: int = 20,
4122    ):
4123        """
4124        Generate UMAP visualizations for clusters filtered by a minimum entity threshold.
4125
4126        This method removes clusters containing fewer than `min_entities` observations
4127        and produces two UMAP visualizations:
4128
4129        1. **Cluster UMAP** – points colored by cluster assignment only.
4130        2. **Cluster × Set UMAP** – points colored by the combination of cluster and set identifier.
4131
4132        Parameters
4133        ----------
4134        min_entities : int, optional
4135            Minimum number of entities required for a cluster to be included
4136            in the visualization. Default is 50.
4137
4138        width : int, optional
4139            Width of the generated matplotlib figures (in inches). Default is 8.
4140
4141        height : int, optional
4142            Height of the generated matplotlib figures (in inches). Default is 6.
4143
4144        n_per_col : int, optional
4145            Maximum number of legend entries per column. Default is 20.
4146
4147        Notes
4148        -----
4149        This method updates the following attributes:
4150
4151        - `self.UMAP_plot['static']['ClusterUMAP']`
4152          Static matplotlib figure of the filtered cluster-only UMAP.
4153
4154        - `self.UMAP_plot['html']['ClusterUMAP']`
4155          Interactive HTML version of the cluster-only UMAP.
4156
4157        - `self.UMAP_plot['static']['ClusterXSetsUMAP']`
4158          Static matplotlib figure showing clusters combined with set identifiers.
4159
4160        - `self.UMAP_plot['html']['ClusterXSetsUMAP']`
4161          Interactive HTML version of the cluster × set visualization.
4162
4163        - `self.tmp_data`
4164          Dataset filtered to include only clusters meeting the `min_entities` threshold.
4165
4166        - `self.tmp_metadata`
4167          Metadata corresponding to the filtered dataset.
4168        """
4169
4170        if None not in self.UMAP_data:
4171
4172            if hasattr(self, "_tmp_data_old"):
4173                self.tmp_data = self._tmp_data_old
4174
4175            if hasattr(self, "_tmp_metadata_old"):
4176                self.tmp_metadata = self._tmp_metadata_old
4177
4178            umap_result = pd.DataFrame(self.UMAP_data.copy())
4179            umap_result["id"] = self.tmp_metadata.index
4180            umap_result["clusters"] = self.dblabels
4181            umap_result = umap_result[umap_result["clusters"] != "-1"]
4182            tmp_metadata = self.tmp_metadata.copy()
4183            tmp_metadata["clusters"] = self.dblabels
4184            tmp_metadata = tmp_metadata[tmp_metadata["clusters"] != "-1"]
4185            tmp_data = self.tmp_data.copy()
4186            tmp_data.index = self.dblabels
4187            tmp_data = tmp_data[tmp_data.index != "-1"]
4188
4189            label_counts_dict = Counter(self.dblabels)
4190
4191            label_counts = pd.DataFrame.from_dict(
4192                label_counts_dict, orient="index", columns=["count"]
4193            )
4194
4195            filtered_counts = label_counts[label_counts["count"] > min_entities]
4196
4197            tmp_metadata["full_id"] = list(
4198                tmp_metadata["id"].astype(str) + " # " + tmp_metadata["sets"]
4199            )
4200
4201            tmp_data.index = tmp_metadata["full_id"]
4202            umap_result["full_id"] = list(tmp_metadata["full_id"])
4203
4204            umap_result = umap_result[
4205                umap_result["clusters"].isin(np.array(filtered_counts.index))
4206            ]
4207            tmp_metadata = tmp_metadata[
4208                tmp_metadata["clusters"].isin(np.array(filtered_counts.index))
4209            ]
4210
4211            umap_result = umap_result.sort_values(
4212                by="clusters", key=lambda x: x.astype(int)
4213            )
4214
4215            tmp_data = tmp_data[tmp_data.index.isin(np.array(tmp_metadata["full_id"]))]
4216
4217            static_fig = umap_static(
4218                umap_result, width=width, height=height, n_per_col=n_per_col
4219            )
4220
4221            html_fig = umap_html(umap_result, width=width * 100, height=height * 100)
4222
4223            self.UMAP_plot["static"]["ClusterUMAP"] = static_fig
4224            self.UMAP_plot["html"]["ClusterUMAP"] = html_fig
4225
4226            tmp_metadata["full_name"] = list(
4227                tmp_metadata["clusters"] + " # " + tmp_metadata["sets"]
4228            )
4229
4230            label_counts_dict = Counter(list(tmp_metadata["full_name"]))
4231
4232            label_counts = pd.DataFrame.from_dict(
4233                label_counts_dict, orient="index", columns=["count"]
4234            )
4235
4236            filtered_counts = label_counts[label_counts["count"] > min_entities]
4237
4238            tmp_data.index = tmp_metadata["full_name"]
4239            umap_result["clusters"] = list(tmp_metadata["full_name"])
4240
4241            umap_result = umap_result[
4242                umap_result["clusters"].isin(np.array(filtered_counts.index))
4243            ]
4244
4245            tmp_metadata = tmp_metadata[
4246                tmp_metadata["full_name"].isin(np.array(filtered_counts.index))
4247            ]
4248
4249            tmp_data = tmp_data[tmp_data.index.isin(np.array(filtered_counts.index))]
4250
4251            static_fig = umap_static(
4252                umap_result, width=width, height=height, n_per_col=n_per_col
4253            )
4254
4255            html_fig = umap_html(umap_result, width=width * 100, height=height * 100)
4256
4257            self.UMAP_plot["static"]["ClusterXSetsUMAP"] = static_fig
4258
4259            self.UMAP_plot["html"]["ClusterXSetsUMAP"] = html_fig
4260
4261            self._tmp_data_old = self.tmp_data
4262            self._tmp_metadata_old = self.tmp_metadata
4263
4264            self.tmp_data = tmp_data
4265            self.tmp_metadata = tmp_metadata
4266
4267        else:
4268            print(
4269                "\nNo data for visualization. Please use the UMAP() and db_scan() methods first."
4270            )
4271
4272    ## save data
4273    def full_info(self):
4274        """
4275        Merge data with metadata based on the 'full_id' column.
4276
4277        This method combines `self.tmp_data` and `self.tmp_metadata` into a single DataFrame if the metadata contains a 'full_id' column. If 'full_id' is not present, the method prints a warning to complete the preprocessing pipeline.
4278
4279        Returns
4280        -------
4281        pd.DataFrame or None
4282            Merged DataFrame containing both data and metadata if 'full_id' exists; otherwise, None.
4283        """
4284
4285        tmp_data = self.tmp_data.copy()
4286        tmp_metadata = self.tmp_metadata.copy()
4287
4288        if "full_id" in tmp_metadata.columns:
4289            tmp_data.index = tmp_metadata["full_id"]
4290
4291            merged_df = tmp_data.merge(
4292                tmp_metadata, left_index=True, right_on="full_id", how="left"
4293            )
4294
4295            return merged_df
4296
4297        else:
4298
4299            print("\nMetadata is not completed!")
4300
4301        #################################################################################
4302
4303    def DFA(self, meta_group_by: str = "sets", sets: dict = {}, n_proc=5):
4304        """
4305        Perform Differential Feature Analysis (DFA) on specified data groups.
4306
4307        This method conducts DFA using a grouping factor from metadata and a dictionary of sets for comparison. It allows for the identification of significant differences across defined sets.
4308
4309        The analysis includes:
4310        - Mann–Whitney U test
4311        - Percentage of non-zero values
4312        - Means and standard deviations
4313        - Effect size metric (ESM)
4314        - Benjamini–Hochberg FDR correction
4315        - Fold-change and log2 fold-change
4316
4317        Parameters
4318        ----------
4319        meta_group_by : str, optional
4320            Metadata column used for grouping during the analysis.
4321            Default is ``'sets'``.
4322            To view available grouping categories, use ``self.groups``.
4323
4324        sets : dict, optional
4325            Dictionary defining groups for pairwise comparison.
4326            Keys correspond to group names, and values are lists of labels
4327            belonging to each group.
4328
4329            Example
4330            -------
4331            >>> sets = {
4332            ...     'healthy': ['21q'],
4333            ...     'disease': ['71q', '77q', '109q']
4334            ... }
4335            In this configuration, the *healthy* group is compared against the
4336            aggregated *disease* groups.
4337
4338        n_proc : int, optional
4339            Number of CPU cores used for parallel processing.
4340            Default is ``5``.
4341
4342        Returns
4343        -------
4344        pandas.DataFrame or None
4345            A DataFrame containing statistical results for each feature, including:
4346
4347            - ``feature`` : str
4348            - ``p_val`` : float
4349            - ``adj_pval`` : float
4350            - ``pct_valid`` : float
4351            - ``pct_ctrl`` : float
4352            - ``avg_valid`` : float
4353            - ``avg_ctrl`` : float
4354            - ``sd_valid`` : float
4355            - ``sd_ctrl`` : float
4356            - ``esm`` : float
4357            - ``FC`` : float
4358            - ``log(FC)`` : float
4359            - ``norm_diff`` : float
4360            - ``valid_group`` : str
4361            - ``-log(p_val)`` : float
4362
4363            If ``sets`` is ``None``, results for each group are concatenated.
4364
4365            Returns ``None`` in case of errors or invalid parameters.
4366
4367        Notes
4368        -----
4369        - Columns containing only zeros are automatically removed.
4370        - p-values equal for both groups produce ``p_val = 1``.
4371        - Benjamini–Hochberg correction is applied separately within each group comparison.
4372        - Fold-change is stabilized using a small, data-derived ``low_factor``.
4373        - Uses ``Mann–Whitney U`` test with ``alternative='two-sided'``.
4374
4375        """
4376
4377        tmp_data = self.tmp_data.copy()
4378
4379        tmp_data = tmp_data.select_dtypes(include="number")
4380
4381        tmp_metadata = self.tmp_metadata.copy()
4382
4383        if len(sets.keys()) >= 2:
4384            print("\nAnalysis strated on provided sets dictionary and meta_group_by...")
4385            tmp_data.index = list(tmp_metadata[meta_group_by])
4386            tmp_metadata["sets"] = tmp_metadata[meta_group_by]
4387            results = statistic(
4388                tmp_data.transpose(), sets=sets, metadata=tmp_metadata, n_proc=n_proc
4389            )
4390
4391        else:
4392            print(
4393                "\nAnalysis strated on for all groups to each other in meta_group_by..."
4394            )
4395            tmp_data.index = list(tmp_metadata[meta_group_by])
4396            tmp_metadata["sets"] = tmp_metadata[meta_group_by]
4397            results = statistic(
4398                tmp_data.transpose(), sets=None, metadata=tmp_metadata, n_proc=n_proc
4399            )
4400
4401        self.DFA_results = results
4402
4403    def heatmap_DFA(
4404        self,
4405        p_value: float | int = 0.05,
4406        top_n: int = 5,
4407        scale: bool = False,
4408        clustering: str | None = "ward",
4409        figsize=(10, 5),
4410    ):
4411        """
4412        Generate a heatmap of the top DFA features filtered by p-value and log fold change.
4413
4414        Parameters
4415        ----------
4416        p_value : float or int, optional
4417            Significance threshold used to filter features by their p-value.
4418            Only features with p_val < p_value are included. Default is 0.05.
4419
4420        top_n : int, optional
4421            Number of top features selected per group based on the 'esm' score.
4422            Default is 5.
4423
4424        scale : bool, optional
4425            Whether to apply Min–Max scaling to heatmap values across features.
4426            Default is False.
4427
4428        clustering : str or None, optional
4429            Hierarchical clustering method applied to rows/columns of the heatmap.
4430            If None, clustering is disabled. Default is 'ward'.
4431
4432        figsize : tuple, optional
4433            Size of the resulting matplotlib figure. Default is (10, 5).
4434
4435        Notes
4436        -----
4437        - Only features with a positive log fold change ('log(FC)' > 0) are considered.
4438        - Heatmap values represent -log10(p_value) for visualization.
4439        - If `scale=True`, values are normalized using Min–Max scaling.
4440        - The generated figure is displayed and stored in `self.DFA_plot`.
4441        """
4442
4443        df_reduced = self.DFA_results.copy()
4444
4445        df_reduced = df_reduced[df_reduced["log(FC)"] > 0]
4446
4447        df_reduced = df_reduced[df_reduced["p_val"] < p_value]
4448
4449        df_reduced = (
4450            df_reduced.sort_values(["valid_group", "esm"], ascending=[True, False])
4451            .groupby("valid_group", as_index=False)
4452            .head(top_n)
4453        )
4454
4455        heatmap_data = df_reduced.pivot(
4456            index="feature", columns="valid_group", values="-log(p_val)"
4457        ).fillna(0)
4458
4459        label = "-log10(p_value)"
4460
4461        if scale:
4462            label = f"scaled({label})"
4463            scaler = MinMaxScaler()
4464            heatmap_data = pd.DataFrame(
4465                scaler.fit_transform(heatmap_data),
4466                index=heatmap_data.index,
4467                columns=heatmap_data.columns,
4468            )
4469
4470        if clustering is not None:
4471            Z_rows = linkage(heatmap_data.values, method=clustering)
4472            row_order = leaves_list(Z_rows)
4473
4474            Z_cols = linkage(heatmap_data.values.T, method=clustering)
4475            col_order = leaves_list(Z_cols)
4476
4477            heatmap_data = heatmap_data.iloc[row_order, col_order]
4478
4479        figure = plt.figure(figsize=figsize)
4480        sns.heatmap(
4481            heatmap_data,
4482            cmap="viridis",
4483            linewidths=0.5,
4484            linecolor="gray",
4485            cbar_kws={"label": label},
4486            fmt=".2f",
4487        )
4488        plt.ylabel("Feature")
4489        plt.xlabel("Cluster")
4490        plt.xticks(rotation=30, ha="right")
4491
4492        plt.tight_layout()
4493
4494        if cfg._DISPLAY_MODE:
4495            plt.show()
4496
4497        self.DFA_plot = figure
4498
4499    def get_DFA_plot(self, show: bool = True):
4500        """
4501        Retrieve the heatmap figure generated by `heatmap_DFA()`.
4502
4503        Parameters
4504        ----------
4505        show : bool, optional
4506            Whether to display the stored heatmap figure. Default is True.
4507
4508        Returns
4509        -------
4510        matplotlib.figure.Figure
4511            The figure object containing the DFA heatmap.
4512        """
4513
4514        if self.DFA_plot is None:
4515            print("\nNo results to return! Please run the heatmap_DFA() method first.")
4516        else:
4517            if cfg._DISPLAY_MODE:
4518                if show is True:
4519                    self.DFA_plot
4520                    try:
4521                        display(self.DFA_plot)
4522                    except:
4523                        None
4524
4525            return self.DFA_plot
4526
4527    def print_avaiable_features(self):
4528        """
4529        Print the available features (columns) in the current dataset.
4530
4531        This method lists all column names in `self.tmp_data` to help identify which features are available for analysis.
4532
4533        Example
4534        -------
4535        >>> group_analysis.print_avaiable_features()
4536        """
4537
4538        print("Avaiable features:")
4539        for cl in self.tmp_data.columns:
4540            print(cl)
4541
4542    def proportion_analysis(
4543        self,
4544        grouping_col: str = "sets",
4545        val_col: str = "nuclei_per_img",
4546        grouping_dict=None,
4547        omit=None,
4548    ):
4549        """
4550        Perform proportion analysis by comparing the distribution of values across groups.
4551
4552        This method analyzes the distribution of values (e.g., nuclei counts) across different groups defined in the dataset. It can optionally group categories, omit specific values, and produces both statistical results and a visualization.
4553
4554        Parameters
4555        ----------
4556        grouping_col : str, optional
4557            Column to group by. Default is 'sets'.
4558
4559        val_col : str, optional
4560            Column containing the values to analyze. Default is 'nuclei_per_img'.
4561
4562        grouping_dict : dict or None, optional
4563            Dictionary mapping new group names to categories in `grouping_col`. If None, analysis is based on the original groups.
4564
4565        omit : str, list, or None, optional
4566            Values to exclude from the analysis. Default is None.
4567
4568        Attributes
4569        ----------
4570        proportion_stats : pd.DataFrame
4571            DataFrame containing chi-square test results for pairwise group comparisons.
4572
4573        proportion_plot : matplotlib.figure.Figure
4574            Plot visualizing the proportions across groups.
4575
4576        Example
4577        -------
4578        >>> group_analysis.proportion_analysis(
4579        ...     grouping_col='sets',
4580        ...     val_col='nuclei_per_img',
4581        ...     grouping_dict={'Group A': [1, 2], 'Group B': [3, 4]},
4582        ...     omit=5
4583        ... )
4584        """
4585
4586        andata = self.tmp_data.copy()
4587
4588        andata[grouping_col] = list(self.tmp_metadata[grouping_col])
4589
4590        andata = andata[[grouping_col, val_col]]
4591
4592        if omit is not None:
4593            if isinstance(omit, list):
4594                andata = andata[~andata[val_col].isin(omit)]
4595            else:
4596                andata = andata[andata[val_col] != omit]
4597
4598        andata = andata.reset_index(drop=True)
4599        andata["index_col"] = andata.index
4600
4601        if isinstance(grouping_dict, dict):
4602            for k in grouping_dict.keys():
4603                andata.loc[
4604                    andata[grouping_col].isin(grouping_dict[k]), grouping_col
4605                ] = k
4606
4607        df_pivot = andata.pivot_table(
4608            index=val_col,
4609            columns=grouping_col,
4610            values="index_col",
4611            aggfunc="count",
4612            fill_value=0,
4613        )
4614
4615        Z_rows = linkage(df_pivot.values, method="ward")
4616        row_order = leaves_list(Z_rows)
4617
4618        Z_cols = linkage(df_pivot.values.T, method="ward")
4619        col_order = leaves_list(Z_cols)
4620
4621        df_pivot = df_pivot.iloc[row_order, col_order]
4622
4623        chi_df = chi_pairs(df_pivot)
4624
4625        self.proportion_stats = chi_pairs(df_pivot)
4626
4627        chi_df["Significance_Label"] = chi_df["p-value"].apply(get_significance_label)
4628
4629        self.proportion_plot = prop_plot(df_pivot, chi_df)
4630
4631    def get_proportion_plot(self, show: bool = True):
4632        """
4633        Retrieve the proportion bar plot generated by the `proportion_analysis()` method.
4634
4635        Parameters
4636        ----------
4637        show : bool, optional
4638            Whether to display the proportion bar plot. Default is True.
4639
4640        Returns
4641        -------
4642        matplotlib.figure.Figure
4643            The figure object containing the proportion bar plot.
4644        """
4645
4646        if self.proportion_plot is None:
4647            print(
4648                "\nNo results to return! Please run the proportion_analysis() method first."
4649            )
4650        else:
4651            if cfg._DISPLAY_MODE:
4652                if show:
4653                    self.proportion_plot
4654                    try:
4655                        display(self.proportion_plot)
4656                    except:
4657                        None
4658
4659            return self.proportion_plot
4660
4661    def get_proportion_stats(self):
4662        """
4663        Retrieve the proportion statistics computed by the `proportion_analysis()` method.
4664
4665        Returns
4666        -------
4667        pd.DataFrame
4668            The proportion statistics stored in `self.proportion_stats`.
4669        """
4670
4671        if None in self.proportion_stats:
4672            print(
4673                "\nNo results to return! Please run the proportion_analysis() method first."
4674            )
4675        else:
4676            return self.proportion_stats
class RepTools:
 46class RepTools:
 47    """
 48    A utility class for processing and repairing nuclei data.
 49    Provides methods for extracting subsets, removing outliers, computing geometrical features,
 50    and merging/splitting nuclei based on spatial and intensity criteria.
 51    """
 52
 53    def extract_dict_by_indices(self, d, indices):
 54        """
 55        Extracts elements from all dictionary lists using provided indices.
 56
 57        Parameters
 58        ----------
 59        d : dict
 60            Dictionary with list values.
 61
 62        indices : list
 63            List of indices to extract from each dictionary entry.
 64
 65        Returns
 66        -------
 67        dict
 68            Dictionary containing only the selected elements.
 69        """
 70
 71        return {
 72            key: [values[i] for i in indices if i < len(values)]
 73            for key, values in d.items()
 74        }
 75
 76    def drop_outlires(self, row, diff_FC_intensity=0.95, circ=0.6):
 77        """
 78        Identify indices of nuclei that are considered outliers based on circularity and intensity.
 79
 80        Parameters
 81        ----------
 82        row : dict
 83            Dictionary containing nuclei properties, including 'circularity' and 'intensity_mean'.
 84
 85        diff_FC_intensity : float
 86            Fraction of mean intensity below which a nucleus is considered an outlier.
 87
 88        circ : float
 89            Minimum circularity threshold for nuclei to be considered.
 90
 91        Returns
 92        -------
 93        list
 94            List of indices to drop as outliers.
 95        """
 96
 97        cd = []
 98        for n, _ in enumerate(row["circularity"]):
 99            if row["circularity"][n] > circ:
100                cd.append(n)
101
102        row = self.extract_dict_by_indices(row, cd)
103
104        drop = []
105        is_mean = np.mean(row["intensity_mean"])
106
107        for n, _ in enumerate(row["intensity_mean"]):
108            FC_mean = row["intensity_mean"][n] / is_mean
109            if FC_mean < diff_FC_intensity:
110                drop.append(n)
111
112        return drop
113
114    def nn(self, coords):
115        """
116        Compute close neighbors between nuclei coordinates using a threshold distance.
117
118        Parameters
119        ----------
120        coords : list
121            List of numpy arrays, each containing coordinates for a nucleus.
122
123        Returns
124        -------
125        dict
126            Dictionary mapping pairs of nuclei indices to the number of close neighbors.
127        """
128
129        full_list = {}
130        for i in range(len(coords)):
131            for j in range(len(coords)):
132                if i != j:
133
134                    tree1 = cKDTree(coords[i])
135
136                    distances, indices = tree1.query(coords[j])
137
138                    threshold = 2
139                    close_neighbors = np.sum(distances < threshold)
140
141                    full_list[f"{i} --> {j}"] = close_neighbors
142
143        return full_list
144
145    def compute_axes_length(self, contour):
146        """
147        Compute major and minor axis lengths of a nucleus from its contour.
148
149        Parameters
150        ----------
151        contour : np.ndarray
152            Coordinates of nucleus contour points.
153
154        Returns
155        -------
156        tuple
157            Major and minor axis lengths.
158        """
159
160        cov = np.cov(contour.T)
161
162        eigvals, _ = np.linalg.eigh(cov)
163
164        axis_major_length = 2 * np.sqrt(eigvals.max())
165        axis_minor_length = 2 * np.sqrt(eigvals.min())
166
167        return axis_major_length, axis_minor_length
168
169    def compute_eccentricity(self, contour):
170        """
171        Compute eccentricity of a nucleus from its contour.
172
173        Parameters
174        ----------
175        contour : np.ndarray
176            Coordinates of nucleus contour points.
177
178        Returns
179        -------
180        float
181            Eccentricity of the nucleus.
182        """
183
184        cov = np.cov(contour.T)
185        eigvals, _ = np.linalg.eigh(cov)
186
187        eccentricity = np.sqrt(1 - (eigvals.min() / eigvals.max()))
188        return eccentricity
189
190    def compute_feret_diameter(self, contour):
191        """
192        Compute the Feret diameter of a given contour.
193
194        The Feret diameter is defined as the maximum pairwise Euclidean distance between points in the contour.
195
196        Parameters
197        ----------
198        contour : np.ndarray
199            Array of shape (N, 2) representing the contour coordinates.
200
201        Returns
202        -------
203        float
204            The maximum distance between any two points in the contour.
205        """
206
207        rect = cv2.minAreaRect(contour)
208        (w, h) = rect[1]
209        return max(w, h)
210
211    def compute_perimeter(self, contour):
212        """
213        Compute the perimeter of a contour.
214
215        The perimeter is calculated as the sum of Euclidean distances between consecutive points in the contour.
216
217        Parameters
218        ----------
219        contour : np.ndarray
220            Array of shape (N, 2) representing the contour coordinates.
221
222        Returns
223        -------
224        float
225            Perimeter length of the contour.
226        """
227
228        return np.sum(np.linalg.norm(np.diff(contour, axis=0), axis=1))
229
230    def compute_circularity(self, contour):
231        """
232        Compute the circularity of a contour.
233
234        Circularity is a measure of how close the shape is to a perfect circle.
235        It is calculated as 4 * pi * (area / perimeter^2).
236
237        Parameters
238        ----------
239        contour : np.ndarray
240            Array of shape (N, 2) representing the contour coordinates.
241
242        Returns
243        -------
244        float
245            Circularity of the contour. Value ranges from 0 to 1, where 1 indicates a perfect circle.
246        """
247        perimeter = self.compute_perimeter(contour)
248        hull = ConvexHull(contour)
249        area = hull.volume
250
251        return (4 * np.pi * area) / (perimeter**2)
252
253    def repairing_nuclei(self, results):
254        """
255        Repair nuclei segmentation results by merging or removing outlier nuclei.
256
257        This method adjusts nuclei detection results based on global and local thresholds, circularity, nearest neighbor relationships,
258        and merges small or fragmented nuclei when appropriate. It also recalculates key morphological properties for merged nuclei.
259
260        Parameters
261        ----------
262        results : dict
263            Dictionary where keys are image identifiers and values are dictionaries containing detected nuclei properties
264            (e.g., 'area', 'coords', 'label', 'circularity', etc.).
265
266        Returns
267        -------
268        dict
269            A dictionary in the same structure as `results`, but with repaired nuclei information after merging or removing outliers.
270        """
271
272        # repairing nuclei
273        mean_sum_area = []
274        im = []
275        n = []
276        for r in tqdm(results.keys()):
277            mean_sum_area.append(np.sum(results[r]["area"]))
278            n.append(len(results[r]["area"]))
279            im.append(r)
280
281        mean_sum_area_sum = np.mean(mean_sum_area)
282
283        results_dict = {}
284
285        print("\nImage repairing:\n\n")
286
287        for i, m in tqdm(zip(im, n), total=len(im)):
288
289            if (
290                m > 1
291                and np.sum(results[i]["area"]) / mean_sum_area_sum
292                < self.hyperparameter_nuclei["FC_diff_global"]
293            ):
294                # adjustment to global changes
295
296                temporary_dict = results[i]
297
298                check_drop = self.drop_outlires(
299                    temporary_dict,
300                    diff_FC_intensity=self.hyperparameter_nuclei[
301                        "FC_diff_local_intensity"
302                    ],
303                    circ=self.hyperparameter_nuclei["circularity"],
304                )
305
306                to_final = [
307                    x
308                    for x in list(range(len(temporary_dict["area"])))
309                    if int(x) not in check_drop
310                ]
311
312                tmp = self.extract_dict_by_indices(temporary_dict, to_final)
313
314                to_concat = []
315
316                if len(tmp["coords"]) > 1:
317
318                    results_nn = self.nn(tmp["coords"])
319
320                    for kn in results_nn.keys():
321                        if results_nn[kn] > self.hyperparameter_nuclei["nn_min"]:
322                            to_concat.append(int(re.sub(" --> .*", "", kn)))
323                            to_concat.append(int(re.sub(".* --> ", "", kn)))
324
325                    to_concat = list(set(to_concat))
326
327                    to_rest = [
328                        x for x in list(range(len(tmp["area"]))) if x not in to_concat
329                    ]
330
331                #
332                if len(to_concat) > 1:
333                    to_concat_dict = self.extract_dict_by_indices(tmp, to_concat)
334                    to_concat_dict["coords"] = [np.vstack(to_concat_dict["coords"])]
335                    to_concat_dict["label"] = [min(to_concat_dict["label"])]
336                    to_concat_dict["area"] = [np.sum(to_concat_dict["area"])]
337                    to_concat_dict["area_bbox"] = [np.sum(to_concat_dict["area_bbox"])]
338                    to_concat_dict["area_convex"] = [
339                        np.sum(to_concat_dict["area_convex"])
340                    ]
341                    to_concat_dict["area_filled"] = [
342                        np.sum(to_concat_dict["area_filled"])
343                    ]
344                    to_concat_dict["intensity_max"] = [
345                        np.max(to_concat_dict["intensity_max"])
346                    ]
347                    to_concat_dict["intensity_mean"] = [
348                        np.mean(to_concat_dict["intensity_mean"])
349                    ]
350                    to_concat_dict["intensity_min"] = [
351                        np.min(to_concat_dict["intensity_min"])
352                    ]
353                    major, minor = self.compute_axes_length(to_concat_dict["coords"][0])
354                    to_concat_dict["axis_major_length"] = [major]
355                    to_concat_dict["axis_minor_length"] = [minor]
356                    to_concat_dict["ratio"] = [minor / major]
357                    ecc = self.compute_eccentricity(to_concat_dict["coords"][0])
358                    to_concat_dict["eccentricity"] = [ecc]
359                    to_concat_dict["equivalent_diameter_area"] = [
360                        np.sum(to_concat_dict["equivalent_diameter_area"])
361                    ]
362                    feret_diameter = self.compute_feret_diameter(
363                        to_concat_dict["coords"][0]
364                    )
365                    to_concat_dict["feret_diameter_max"] = [feret_diameter]
366                    to_concat_dict["solidity"] = [np.mean(to_concat_dict["solidity"])]
367                    to_concat_dict["perimeter"] = [np.sum(to_concat_dict["perimeter"])]
368                    to_concat_dict["perimeter_crofton"] = [
369                        np.sum(to_concat_dict["perimeter_crofton"])
370                    ]
371                    to_concat_dict["circularity"] = [
372                        np.mean(to_concat_dict["circularity"])
373                    ]
374
375                    to_rest_dict = self.extract_dict_by_indices(tmp, to_rest)
376
377                    for ik in to_rest_dict.keys():
378                        to_rest_dict[ik] = to_rest_dict[ik] + to_concat_dict[ik]
379
380                    results_dict[i] = to_rest_dict
381
382                else:
383                    results_dict[i] = tmp
384
385            elif (
386                m == 1
387                and results[i]["circularity"][0]
388                > self.hyperparameter_nuclei["circularity"]
389            ):
390
391                results_dict[i] = results[i]
392
393        return results_dict

A utility class for processing and repairing nuclei data. Provides methods for extracting subsets, removing outliers, computing geometrical features, and merging/splitting nuclei based on spatial and intensity criteria.

def extract_dict_by_indices(self, d, indices):
53    def extract_dict_by_indices(self, d, indices):
54        """
55        Extracts elements from all dictionary lists using provided indices.
56
57        Parameters
58        ----------
59        d : dict
60            Dictionary with list values.
61
62        indices : list
63            List of indices to extract from each dictionary entry.
64
65        Returns
66        -------
67        dict
68            Dictionary containing only the selected elements.
69        """
70
71        return {
72            key: [values[i] for i in indices if i < len(values)]
73            for key, values in d.items()
74        }

Extracts elements from all dictionary lists using provided indices.

Parameters

d : dict Dictionary with list values.

indices : list List of indices to extract from each dictionary entry.

Returns

dict Dictionary containing only the selected elements.

def drop_outlires(self, row, diff_FC_intensity=0.95, circ=0.6):
 76    def drop_outlires(self, row, diff_FC_intensity=0.95, circ=0.6):
 77        """
 78        Identify indices of nuclei that are considered outliers based on circularity and intensity.
 79
 80        Parameters
 81        ----------
 82        row : dict
 83            Dictionary containing nuclei properties, including 'circularity' and 'intensity_mean'.
 84
 85        diff_FC_intensity : float
 86            Fraction of mean intensity below which a nucleus is considered an outlier.
 87
 88        circ : float
 89            Minimum circularity threshold for nuclei to be considered.
 90
 91        Returns
 92        -------
 93        list
 94            List of indices to drop as outliers.
 95        """
 96
 97        cd = []
 98        for n, _ in enumerate(row["circularity"]):
 99            if row["circularity"][n] > circ:
100                cd.append(n)
101
102        row = self.extract_dict_by_indices(row, cd)
103
104        drop = []
105        is_mean = np.mean(row["intensity_mean"])
106
107        for n, _ in enumerate(row["intensity_mean"]):
108            FC_mean = row["intensity_mean"][n] / is_mean
109            if FC_mean < diff_FC_intensity:
110                drop.append(n)
111
112        return drop

Identify indices of nuclei that are considered outliers based on circularity and intensity.

Parameters

row : dict Dictionary containing nuclei properties, including 'circularity' and 'intensity_mean'.

diff_FC_intensity : float Fraction of mean intensity below which a nucleus is considered an outlier.

circ : float Minimum circularity threshold for nuclei to be considered.

Returns

list List of indices to drop as outliers.

def nn(self, coords):
114    def nn(self, coords):
115        """
116        Compute close neighbors between nuclei coordinates using a threshold distance.
117
118        Parameters
119        ----------
120        coords : list
121            List of numpy arrays, each containing coordinates for a nucleus.
122
123        Returns
124        -------
125        dict
126            Dictionary mapping pairs of nuclei indices to the number of close neighbors.
127        """
128
129        full_list = {}
130        for i in range(len(coords)):
131            for j in range(len(coords)):
132                if i != j:
133
134                    tree1 = cKDTree(coords[i])
135
136                    distances, indices = tree1.query(coords[j])
137
138                    threshold = 2
139                    close_neighbors = np.sum(distances < threshold)
140
141                    full_list[f"{i} --> {j}"] = close_neighbors
142
143        return full_list

Compute close neighbors between nuclei coordinates using a threshold distance.

Parameters

coords : list List of numpy arrays, each containing coordinates for a nucleus.

Returns

dict Dictionary mapping pairs of nuclei indices to the number of close neighbors.

def compute_axes_length(self, contour):
145    def compute_axes_length(self, contour):
146        """
147        Compute major and minor axis lengths of a nucleus from its contour.
148
149        Parameters
150        ----------
151        contour : np.ndarray
152            Coordinates of nucleus contour points.
153
154        Returns
155        -------
156        tuple
157            Major and minor axis lengths.
158        """
159
160        cov = np.cov(contour.T)
161
162        eigvals, _ = np.linalg.eigh(cov)
163
164        axis_major_length = 2 * np.sqrt(eigvals.max())
165        axis_minor_length = 2 * np.sqrt(eigvals.min())
166
167        return axis_major_length, axis_minor_length

Compute major and minor axis lengths of a nucleus from its contour.

Parameters

contour : np.ndarray Coordinates of nucleus contour points.

Returns

tuple Major and minor axis lengths.

def compute_eccentricity(self, contour):
169    def compute_eccentricity(self, contour):
170        """
171        Compute eccentricity of a nucleus from its contour.
172
173        Parameters
174        ----------
175        contour : np.ndarray
176            Coordinates of nucleus contour points.
177
178        Returns
179        -------
180        float
181            Eccentricity of the nucleus.
182        """
183
184        cov = np.cov(contour.T)
185        eigvals, _ = np.linalg.eigh(cov)
186
187        eccentricity = np.sqrt(1 - (eigvals.min() / eigvals.max()))
188        return eccentricity

Compute eccentricity of a nucleus from its contour.

Parameters

contour : np.ndarray Coordinates of nucleus contour points.

Returns

float Eccentricity of the nucleus.

def compute_feret_diameter(self, contour):
190    def compute_feret_diameter(self, contour):
191        """
192        Compute the Feret diameter of a given contour.
193
194        The Feret diameter is defined as the maximum pairwise Euclidean distance between points in the contour.
195
196        Parameters
197        ----------
198        contour : np.ndarray
199            Array of shape (N, 2) representing the contour coordinates.
200
201        Returns
202        -------
203        float
204            The maximum distance between any two points in the contour.
205        """
206
207        rect = cv2.minAreaRect(contour)
208        (w, h) = rect[1]
209        return max(w, h)

Compute the Feret diameter of a given contour.

The Feret diameter is defined as the maximum pairwise Euclidean distance between points in the contour.

Parameters

contour : np.ndarray Array of shape (N, 2) representing the contour coordinates.

Returns

float The maximum distance between any two points in the contour.

def compute_perimeter(self, contour):
211    def compute_perimeter(self, contour):
212        """
213        Compute the perimeter of a contour.
214
215        The perimeter is calculated as the sum of Euclidean distances between consecutive points in the contour.
216
217        Parameters
218        ----------
219        contour : np.ndarray
220            Array of shape (N, 2) representing the contour coordinates.
221
222        Returns
223        -------
224        float
225            Perimeter length of the contour.
226        """
227
228        return np.sum(np.linalg.norm(np.diff(contour, axis=0), axis=1))

Compute the perimeter of a contour.

The perimeter is calculated as the sum of Euclidean distances between consecutive points in the contour.

Parameters

contour : np.ndarray Array of shape (N, 2) representing the contour coordinates.

Returns

float Perimeter length of the contour.

def compute_circularity(self, contour):
230    def compute_circularity(self, contour):
231        """
232        Compute the circularity of a contour.
233
234        Circularity is a measure of how close the shape is to a perfect circle.
235        It is calculated as 4 * pi * (area / perimeter^2).
236
237        Parameters
238        ----------
239        contour : np.ndarray
240            Array of shape (N, 2) representing the contour coordinates.
241
242        Returns
243        -------
244        float
245            Circularity of the contour. Value ranges from 0 to 1, where 1 indicates a perfect circle.
246        """
247        perimeter = self.compute_perimeter(contour)
248        hull = ConvexHull(contour)
249        area = hull.volume
250
251        return (4 * np.pi * area) / (perimeter**2)

Compute the circularity of a contour.

Circularity is a measure of how close the shape is to a perfect circle. It is calculated as 4 * pi * (area / perimeter^2).

Parameters

contour : np.ndarray Array of shape (N, 2) representing the contour coordinates.

Returns

float Circularity of the contour. Value ranges from 0 to 1, where 1 indicates a perfect circle.

def repairing_nuclei(self, results):
253    def repairing_nuclei(self, results):
254        """
255        Repair nuclei segmentation results by merging or removing outlier nuclei.
256
257        This method adjusts nuclei detection results based on global and local thresholds, circularity, nearest neighbor relationships,
258        and merges small or fragmented nuclei when appropriate. It also recalculates key morphological properties for merged nuclei.
259
260        Parameters
261        ----------
262        results : dict
263            Dictionary where keys are image identifiers and values are dictionaries containing detected nuclei properties
264            (e.g., 'area', 'coords', 'label', 'circularity', etc.).
265
266        Returns
267        -------
268        dict
269            A dictionary in the same structure as `results`, but with repaired nuclei information after merging or removing outliers.
270        """
271
272        # repairing nuclei
273        mean_sum_area = []
274        im = []
275        n = []
276        for r in tqdm(results.keys()):
277            mean_sum_area.append(np.sum(results[r]["area"]))
278            n.append(len(results[r]["area"]))
279            im.append(r)
280
281        mean_sum_area_sum = np.mean(mean_sum_area)
282
283        results_dict = {}
284
285        print("\nImage repairing:\n\n")
286
287        for i, m in tqdm(zip(im, n), total=len(im)):
288
289            if (
290                m > 1
291                and np.sum(results[i]["area"]) / mean_sum_area_sum
292                < self.hyperparameter_nuclei["FC_diff_global"]
293            ):
294                # adjustment to global changes
295
296                temporary_dict = results[i]
297
298                check_drop = self.drop_outlires(
299                    temporary_dict,
300                    diff_FC_intensity=self.hyperparameter_nuclei[
301                        "FC_diff_local_intensity"
302                    ],
303                    circ=self.hyperparameter_nuclei["circularity"],
304                )
305
306                to_final = [
307                    x
308                    for x in list(range(len(temporary_dict["area"])))
309                    if int(x) not in check_drop
310                ]
311
312                tmp = self.extract_dict_by_indices(temporary_dict, to_final)
313
314                to_concat = []
315
316                if len(tmp["coords"]) > 1:
317
318                    results_nn = self.nn(tmp["coords"])
319
320                    for kn in results_nn.keys():
321                        if results_nn[kn] > self.hyperparameter_nuclei["nn_min"]:
322                            to_concat.append(int(re.sub(" --> .*", "", kn)))
323                            to_concat.append(int(re.sub(".* --> ", "", kn)))
324
325                    to_concat = list(set(to_concat))
326
327                    to_rest = [
328                        x for x in list(range(len(tmp["area"]))) if x not in to_concat
329                    ]
330
331                #
332                if len(to_concat) > 1:
333                    to_concat_dict = self.extract_dict_by_indices(tmp, to_concat)
334                    to_concat_dict["coords"] = [np.vstack(to_concat_dict["coords"])]
335                    to_concat_dict["label"] = [min(to_concat_dict["label"])]
336                    to_concat_dict["area"] = [np.sum(to_concat_dict["area"])]
337                    to_concat_dict["area_bbox"] = [np.sum(to_concat_dict["area_bbox"])]
338                    to_concat_dict["area_convex"] = [
339                        np.sum(to_concat_dict["area_convex"])
340                    ]
341                    to_concat_dict["area_filled"] = [
342                        np.sum(to_concat_dict["area_filled"])
343                    ]
344                    to_concat_dict["intensity_max"] = [
345                        np.max(to_concat_dict["intensity_max"])
346                    ]
347                    to_concat_dict["intensity_mean"] = [
348                        np.mean(to_concat_dict["intensity_mean"])
349                    ]
350                    to_concat_dict["intensity_min"] = [
351                        np.min(to_concat_dict["intensity_min"])
352                    ]
353                    major, minor = self.compute_axes_length(to_concat_dict["coords"][0])
354                    to_concat_dict["axis_major_length"] = [major]
355                    to_concat_dict["axis_minor_length"] = [minor]
356                    to_concat_dict["ratio"] = [minor / major]
357                    ecc = self.compute_eccentricity(to_concat_dict["coords"][0])
358                    to_concat_dict["eccentricity"] = [ecc]
359                    to_concat_dict["equivalent_diameter_area"] = [
360                        np.sum(to_concat_dict["equivalent_diameter_area"])
361                    ]
362                    feret_diameter = self.compute_feret_diameter(
363                        to_concat_dict["coords"][0]
364                    )
365                    to_concat_dict["feret_diameter_max"] = [feret_diameter]
366                    to_concat_dict["solidity"] = [np.mean(to_concat_dict["solidity"])]
367                    to_concat_dict["perimeter"] = [np.sum(to_concat_dict["perimeter"])]
368                    to_concat_dict["perimeter_crofton"] = [
369                        np.sum(to_concat_dict["perimeter_crofton"])
370                    ]
371                    to_concat_dict["circularity"] = [
372                        np.mean(to_concat_dict["circularity"])
373                    ]
374
375                    to_rest_dict = self.extract_dict_by_indices(tmp, to_rest)
376
377                    for ik in to_rest_dict.keys():
378                        to_rest_dict[ik] = to_rest_dict[ik] + to_concat_dict[ik]
379
380                    results_dict[i] = to_rest_dict
381
382                else:
383                    results_dict[i] = tmp
384
385            elif (
386                m == 1
387                and results[i]["circularity"][0]
388                > self.hyperparameter_nuclei["circularity"]
389            ):
390
391                results_dict[i] = results[i]
392
393        return results_dict

Repair nuclei segmentation results by merging or removing outlier nuclei.

This method adjusts nuclei detection results based on global and local thresholds, circularity, nearest neighbor relationships, and merges small or fragmented nuclei when appropriate. It also recalculates key morphological properties for merged nuclei.

Parameters

results : dict Dictionary where keys are image identifiers and values are dictionaries containing detected nuclei properties (e.g., 'area', 'coords', 'label', 'circularity', etc.).

Returns

dict A dictionary in the same structure as results, but with repaired nuclei information after merging or removing outliers.

class ImagesManagement:
396class ImagesManagement:
397    """
398    A class for managing, preprocessing, merging, stitching, saving, and loading
399    microscopy or flow cytometry images used in NucleiFinder-based analyses.
400
401    This class provides a unified interface for:
402
403    - loading image data,
404    - selecting images by IDs,
405    - preprocessing images (equalization, CLAHE, gamma/contrast/brightness adjustment),
406    - merging images with user-defined intensity ratios,
407    - stitching images horizontally,
408    - retrieving and saving processed image sets.
409
410    The class stores original or loaded data in the ``results_images`` attribute,
411    and all processed images in ``prepared_images`` under user-defined acronyms.
412    These acronyms allow flexible retrieval with ``get_prepared_images()``
413    and exporting via ``save_prepared_images()``.
414
415    Parameters
416    ----------
417    images_ids : list[int]
418        List of selected image identifiers.
419
420    result_dict : dict or None
421        Dictionary containing raw or preprocessed images.
422        If ``None``, images may later be loaded or processed from file.
423
424    experiment_name : str
425        Name of the experiment. Used for saving and structuring output.
426
427    Attributes
428    ----------
429    images_ids : list[int]
430        IDs of images managed by the class.
431
432    results_images : dict or None
433        Dictionary containing raw or analysis-derived images.
434
435    experiment_name : str
436        Name of the experiment. Used in saved filenames.
437
438    prepared_images : dict
439        Container for processed/adjusted/merged/stitched images,
440        indexed by user-defined acronyms.
441
442    Notes
443    -----
444    Processed images are stored only in memory until saved explicitly with
445    ``save_prepared_images()``.
446
447    Raw images loaded from NucleiFinder analyses can be saved for later reuse
448    in a serialized `.inuc` format using ``save_raw()``.
449
450    Examples
451    --------
452    Load image results from an analysis:
453
454    >>> manager = ImagesManagement.load_experimental_images(results, "experiment1")
455
456    Adjust selected images:
457
458    >>> manager.adjust_images(
459    ...     acronyme="adj",
460    ...     path_to_images="path/to/imgs",
461    ...     eq=True,
462    ...     clahe=True
463    ... )
464
465    Merge multiple prepared sets:
466
467    >>> manager.image_merging(["adj", "other"], ratio_list=[0.7, 0.3])
468
469    Retrieve processed images:
470
471    >>> imgs = manager.get_prepared_images("adj")
472
473    Save stitched images to disk:
474
475    >>> manager.save_prepared_images("stitched_adj_other", "./output/")
476    """
477
478    def __init__(self, images_ids, result_dict, experiment_name):
479        """
480        Initialize the ImagesManagement object.
481
482        Parameters
483        ----------
484        images_ids : list[int]
485            List of image identifiers.
486
487        result_dict : dict or None
488            Dictionary containing processed images.
489
490        experiment_name : str
491            Name of the experiment.
492        """
493
494        self.images_ids = images_ids
495        """Stores the list of image IDs managed by this instance."""
496        self.results_images = result_dict
497        """Stores dictionary containing processed images."""
498        self.experiment_name = experiment_name
499        """Stores the experiment name for file naming and organizational purposes."""
500        self.prepared_images = {}
501        """Dictionary for storing processed images (adjusted, merged, stitched),
502        indexed by user-defined acronyms for flexible retrieval."""
503
504    @classmethod
505    def load_from_dict(cls, path: str, experiment_name: str):
506        """
507        Load an ImagesManagement instance from a `.inuc` serialized dictionary.
508
509        Parameters
510        ----------
511        path : str
512            Path to the `.inuc` file exported with `save_raw()`.
513
514        experiment_name : str
515            Name of the experiment.
516
517        Returns
518        -------
519        ImagesManagement
520            A reconstructed ImagesManagement object.
521        """
522
523        if ".inuc" in path:
524
525            if os.path.exists(path):
526
527                loaded_data = np.load(path)
528                data_dict = {key: loaded_data[key] for key in loaded_data}
529
530                id_list = []
531
532                for k in data_dict.keys():
533                    id_list.append(re.sub("_.*", "", k))
534
535                return cls(id_list, data_dict, experiment_name)
536
537            else:
538                raise ValueError("\nInvalid path!")
539
540        else:
541            raise ValueError(
542                "\nInvalid dictionary to load. It must contain a .inuc extension!"
543            )
544
545    @classmethod
546    def load_experimental_images(cls, results_dict: dict, experiment_name: str):
547        """
548        Load results exported from NucleiFinder series analysis.
549
550        Initialize the object with results from series_analysis_nuclei()
551        or series_analysis_chromatinization() of the NucleiFinder class.
552
553
554        Parameters
555        ----------
556        results_dict : dict
557            Dictionary returned by `series_analysis_nuclei()` or
558            `series_analysis_chromatinization()`.
559
560        experiment_name : str
561            Name of the experiment.
562
563        Returns
564        -------
565        ImagesManagement
566
567        """
568
569        res_dict = {}
570        id_list = []
571
572        if set(results_dict[list(results_dict.keys())[0]].keys()) != set(
573            ["stats", "img"]
574        ):
575            raise ValueError(
576                "Incorrect data provided. The data must come from series_analysis_nuclei() "
577                "or series_analysis_chromatinization() of the NucleiFinder class."
578            )
579
580        for k in results_dict.keys():
581            res_dict[k] = results_dict[k]["img"]
582            id_list.append(re.sub("_.*", "", k))
583
584        return cls(id_list, res_dict, experiment_name)
585
586    @classmethod
587    def load_images_ids(cls, images_ids: list, experiment_name: str):
588        """
589        Initialize the object with list of images IDs for porcesing.
590
591        Parameters
592        ----------
593        images_ids : list[int]
594            List of selected image IDs.
595
596        experiment_name : str
597            Name of the experiment.
598
599        Returns
600        -------
601        ImagesManagement
602
603        """
604
605        if len(images_ids) == 0:
606            raise ValueError(
607                "Incorrect data provided. There must be a list of image IDs."
608            )
609
610        return cls(images_ids, None, experiment_name)
611
612    def get_included_acronyms(self):
613        """
614        Print the data acronyms for adjusted images, processed using the
615        self.adjust_images(), self.image_merging(), and self.image_stitching() methods.
616
617        Acronym information is essential for retrieving and saving data using
618        the self.get_prepared_images() and self.save_prepared_images() methods.
619
620        Notes
621        -----
622        This method prints the list of available acronyms but does not return it.
623
624        """
625
626        if len(self.prepared_images.keys()) > 0:
627            print("\nAvaiable stored images:\n")
628            for kd in self.prepared_images.keys():
629                print(kd)
630
631        else:
632            print("Nothing to return!")
633
634    def get_prepared_images(self, acronyme=None):
635        """
636        Retrieves the prepared images (returned from adjust_images()) stored in the object.
637
638
639        Parameters
640        ----------
641        acronyme : str or None
642            Acronym identifying a processed image set. If None, prints available keys.
643
644
645        Returns
646        -------
647        dict
648            Dictionary of prepared images.
649        """
650
651        if acronyme is None:
652
653            self.get_included_acronyms()
654
655        else:
656
657            if acronyme in list(self.prepared_images.keys()):
658                return self.prepared_images[acronyme]
659
660            raise ValueError("Incorrect acronyme!")
661
662    def save_prepared_images(self, acronyme: str, path_to_save: str = ""):
663        """
664        Saves prepared images (returned from adjust_images() method) to the specified directory.
665
666        Parameters
667        ----------
668        path_to_save : str
669            Directory path where the images will be saved. Default is the current working directory.
670
671        """
672        if acronyme is None:
673
674            self.get_included_acronyms()
675
676        else:
677
678            if acronyme in list(self.prepared_images.keys()):
679
680                path_to_save = os.path.join(
681                    path_to_save, f"{self.experiment_name}_{acronyme}"
682                )
683
684                if not os.path.exists(path_to_save):
685                    os.makedirs(path_to_save, exist_ok=True)
686
687                for i in tqdm(self.prepared_images[acronyme].keys()):
688                    cv2.imwrite(
689                        os.path.join(path_to_save, i + ".png"),
690                        self.prepared_images[acronyme][i],
691                    )
692
693            else:
694                raise ValueError("Incorrect acronyme!")
695
696    def adjust_images(
697        self,
698        acronyme: str,
699        path_to_images: str,
700        file_extension: str = "tif",
701        eq: bool = True,
702        clahe: bool = True,
703        kernal: tuple = (50, 50),
704        fille_name_part: str = "",
705        color: str = "gray",
706        max_intensity: int = 65535,
707        min_intenisty: int = 0,
708        brightness: int = 1000,
709        contrast: float = 1.0,
710        gamma: float = 1.0,
711        img_n: int = 0,
712    ):
713        """
714        Prepares selected images for processing, applying histogram equalization and CLAHE, if required.
715
716        Parameters
717        ----------
718        acronyme : str
719            Name of images being adjusted in this run.
720
721        path_to_images : str
722            Path to the directory containing images.
723
724        file_extension : str
725            Image file extension. Default is 'tiff'.
726
727        eq : bool
728            Whether to apply histogram equalization. Default is True.
729
730        clahe : bool
731            Whether to apply CLAHE. Default is True.
732
733        kernal : tuple
734            Kernel size for CLAHE. Default is (50, 50).
735
736        fille_name_part : str
737            Part of the file name to filter images. Default is an empty string.
738
739        color : str
740            Color space to use. Default is 'gray'.
741
742        max_intensity : int
743            Maximum intensity for image adjustment. Default is 65535.
744
745        min_intenisty : int
746            Minimum intensity for image adjustment. Default is 0.
747
748        brightness : int
749            Brightness adjustment value. Default is 1000.
750
751        contrast : float
752            Contrast adjustment factor. Default is 1.0.
753
754        gamma : float
755            Gamma correction factor. Default is 1.0.
756
757        img_n : int
758            Number of images to process. Default is 0, which means all images.
759
760
761        Returns
762        -------
763        dict
764            Dictionary containing the processed images.
765
766        Notes
767        -----
768        To access the processed images, use the ``get_prepared_images()`` method.
769
770        To save the processed images to disk, use the ``save_prepared_images()`` method.
771        """
772
773        results_dict = {}
774
775        files = glob.glob(os.path.join(path_to_images, "*." + file_extension))
776
777        if len(fille_name_part) > 0:
778            files = [x for x in files if fille_name_part.lower() in x.lower()]
779
780        selected_id = self.images_ids
781
782        if len(selected_id) > 0:
783            selected_id = [str(x) for x in selected_id]
784            files = [
785                x
786                for x in files
787                if re.sub("_.*", "", os.path.basename(x)) in selected_id
788            ]
789
790        if img_n > 0:
791
792            files = random.sample(files, img_n)
793
794        for file in tqdm(files):
795
796            image = load_image(file)
797
798            try:
799                image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
800            except:
801                pass
802
803            if eq is True:
804                image = equalizeHist_16bit(image)
805
806            if clahe is True:
807                image = clahe_16bit(image, kernal=kernal)
808
809            image = adjust_img_16bit(
810                img=image,
811                color=color,
812                max_intensity=max_intensity,
813                min_intenisty=min_intenisty,
814                brightness=brightness,
815                contrast=contrast,
816                gamma=gamma,
817            )
818
819            results_dict[os.path.basename(file)] = image
820
821        self.prepared_images[acronyme] = results_dict
822
823    def image_merging(self, acronyms: list, ratio_list: list):
824        """
825        Merge previously prepared images stored in `self.prepared_images`,
826        adjusted based on the image ratios. The used ratios adjust relative image intensity.
827
828        Parameters
829        ----------
830        acronyme : list
831            List of image names to be merged.
832
833        ratio_list : list[float]
834            List of ratio intensity values (0.0–1.0) for the merged image.
835            The `acronyme` list and `ratio_list` must be of the same length.
836
837        Returns
838        -------
839        dict
840            Dictionary of processed images.
841
842        Notes
843        -----
844        To access the processed images, use the ``get_prepared_images()`` method.
845
846        To save the processed images to disk, use the ``save_prepared_images()`` method.
847        """
848
849        for a in acronyms:
850            if a not in list(self.prepared_images.keys()):
851                raise ValueError(f"Incorrect {a} acronyme!")
852
853        results_img = {}
854        for k in self.images_ids:
855            img_list = []
856            for a in acronyms:
857                nam = [
858                    x
859                    for x in self.prepared_images[a].keys()
860                    if str(k) == re.sub("_.*", "", x)
861                ]
862                if len(nam) == 0:
863                    print(f"There were not images for {k} ids")
864                    break
865
866                img_list.append(self.prepared_images[a][nam[0]])
867
868            if len(img_list) == len(acronyms):
869                results_img[f'{k}_{"_".join(acronyms)}'] = merge_images(
870                    img_list, ratio_list
871                )
872
873        self.prepared_images[f'merged_{"_".join(acronyms)}'] = results_img
874
875        print(f'Images stored in self.prepared_images["merged_{"_".join(acronyms)}"]')
876
877    def image_stitching(self, acronyms: list, to_results_image: bool = False):
878        """
879        Stitch (horizontally) previously prepared images stored in `self.prepared_images`.
880
881        Parameters
882        ----------
883        acronyme : list
884            List of image names to be stitched.
885
886        to_results_image : bool
887            Boolean value indicating whether images obtained from the
888            `series_analysis_nuclei()` or `series_analysis_chromatinization()`
889            methods of the `NucleiFinder` class should be stitched to the right
890            side of the images in the `acronyme` list.
891
892        Returns
893        -------
894        dict
895            Dictionary of processed images.
896
897        Notes
898        -----
899        To access the processed images, use the ``get_prepared_images()`` method.
900
901        To save the processed images to disk, use the ``save_prepared_images()`` method.
902        """
903
904        for a in acronyms:
905            if a not in list(self.prepared_images.keys()):
906                raise ValueError(f"Incorrect {a} acronyme!")
907
908        results_img = {}
909        for k in tqdm(self.images_ids):
910            img_list = []
911            for a in acronyms:
912                nam = [
913                    x
914                    for x in self.prepared_images[a].keys()
915                    if str(k) == re.sub("_.*", "", x)
916                ]
917                if len(nam) == 0:
918                    print(f"There were not images for {k} ids")
919                    break
920
921                img_list.append(self.prepared_images[a][nam[0]])
922
923            if to_results_image:
924                nam = [
925                    x
926                    for x in self.results_images.keys()
927                    if str(k) == re.sub("_.*", "", x)
928                ]
929                if len(nam) != 0:
930                    img_list.append(self.results_images[nam[0]])
931
932                if len(img_list) == len(acronyms) + 1:
933                    results_img[f'{k}_{"_".join(acronyms)}_res'] = cv2.hconcat(img_list)
934
935            elif to_results_image is not False:
936                if len(img_list) == len(acronyms):
937                    results_img[f'{k}_{"_".join(acronyms)}'] = cv2.hconcat(img_list)
938
939        self.prepared_images[f'stitched_{"_".join(acronyms)}'] = results_img
940
941        print(f'Images stored in self.prepared_images["stitched_{"_".join(acronyms)}"]')
942
943    def save_raw(self, path_to_save: str = ""):
944        """
945        Save `self.results_images` loaded by the `self.load_experimental_images()` method,
946        obtained from the `series_analysis_nuclei()` or `series_analysis_chromatinization()`
947        methods of the `NucleiFinder` class for later usage with cls.load_from_dict() method.
948        The data will be saved with a `.inuc` extension.
949
950        Parameters
951        ----------
952        path_to_save : str
953            The directory path where the images will be saved.
954            Default is the current working directory.
955        """
956
957        full_path = os.path.join(path_to_save, f"{self.experiment_name}.inuc")
958
959        np.savez(full_path, **self.results_images)

A class for managing, preprocessing, merging, stitching, saving, and loading microscopy or flow cytometry images used in NucleiFinder-based analyses.

This class provides a unified interface for:

  • loading image data,
  • selecting images by IDs,
  • preprocessing images (equalization, CLAHE, gamma/contrast/brightness adjustment),
  • merging images with user-defined intensity ratios,
  • stitching images horizontally,
  • retrieving and saving processed image sets.

The class stores original or loaded data in the results_images attribute, and all processed images in prepared_images under user-defined acronyms. These acronyms allow flexible retrieval with get_prepared_images() and exporting via save_prepared_images().

Parameters

images_ids : list[int] List of selected image identifiers.

result_dict : dict or None Dictionary containing raw or preprocessed images. If None, images may later be loaded or processed from file.

experiment_name : str Name of the experiment. Used for saving and structuring output.

Attributes

images_ids : list[int] IDs of images managed by the class.

results_images : dict or None Dictionary containing raw or analysis-derived images.

experiment_name : str Name of the experiment. Used in saved filenames.

prepared_images : dict Container for processed/adjusted/merged/stitched images, indexed by user-defined acronyms.

Notes

Processed images are stored only in memory until saved explicitly with save_prepared_images().

Raw images loaded from NucleiFinder analyses can be saved for later reuse in a serialized .inuc format using save_raw().

Examples

Load image results from an analysis:

>>> manager = ImagesManagement.load_experimental_images(results, "experiment1")

Adjust selected images:

>>> manager.adjust_images(
...     acronyme="adj",
...     path_to_images="path/to/imgs",
...     eq=True,
...     clahe=True
... )

Merge multiple prepared sets:

>>> manager.image_merging(["adj", "other"], ratio_list=[0.7, 0.3])

Retrieve processed images:

>>> imgs = manager.get_prepared_images("adj")

Save stitched images to disk:

>>> manager.save_prepared_images("stitched_adj_other", "./output/")
ImagesManagement(images_ids, result_dict, experiment_name)
478    def __init__(self, images_ids, result_dict, experiment_name):
479        """
480        Initialize the ImagesManagement object.
481
482        Parameters
483        ----------
484        images_ids : list[int]
485            List of image identifiers.
486
487        result_dict : dict or None
488            Dictionary containing processed images.
489
490        experiment_name : str
491            Name of the experiment.
492        """
493
494        self.images_ids = images_ids
495        """Stores the list of image IDs managed by this instance."""
496        self.results_images = result_dict
497        """Stores dictionary containing processed images."""
498        self.experiment_name = experiment_name
499        """Stores the experiment name for file naming and organizational purposes."""
500        self.prepared_images = {}
501        """Dictionary for storing processed images (adjusted, merged, stitched),
502        indexed by user-defined acronyms for flexible retrieval."""

Initialize the ImagesManagement object.

Parameters

images_ids : list[int] List of image identifiers.

result_dict : dict or None Dictionary containing processed images.

experiment_name : str Name of the experiment.

images_ids

Stores the list of image IDs managed by this instance.

results_images

Stores dictionary containing processed images.

experiment_name

Stores the experiment name for file naming and organizational purposes.

prepared_images

Dictionary for storing processed images (adjusted, merged, stitched), indexed by user-defined acronyms for flexible retrieval.

@classmethod
def load_from_dict(cls, path: str, experiment_name: str):
504    @classmethod
505    def load_from_dict(cls, path: str, experiment_name: str):
506        """
507        Load an ImagesManagement instance from a `.inuc` serialized dictionary.
508
509        Parameters
510        ----------
511        path : str
512            Path to the `.inuc` file exported with `save_raw()`.
513
514        experiment_name : str
515            Name of the experiment.
516
517        Returns
518        -------
519        ImagesManagement
520            A reconstructed ImagesManagement object.
521        """
522
523        if ".inuc" in path:
524
525            if os.path.exists(path):
526
527                loaded_data = np.load(path)
528                data_dict = {key: loaded_data[key] for key in loaded_data}
529
530                id_list = []
531
532                for k in data_dict.keys():
533                    id_list.append(re.sub("_.*", "", k))
534
535                return cls(id_list, data_dict, experiment_name)
536
537            else:
538                raise ValueError("\nInvalid path!")
539
540        else:
541            raise ValueError(
542                "\nInvalid dictionary to load. It must contain a .inuc extension!"
543            )

Load an ImagesManagement instance from a .inuc serialized dictionary.

Parameters

path : str Path to the .inuc file exported with save_raw().

experiment_name : str Name of the experiment.

Returns

ImagesManagement A reconstructed ImagesManagement object.

@classmethod
def load_experimental_images(cls, results_dict: dict, experiment_name: str):
545    @classmethod
546    def load_experimental_images(cls, results_dict: dict, experiment_name: str):
547        """
548        Load results exported from NucleiFinder series analysis.
549
550        Initialize the object with results from series_analysis_nuclei()
551        or series_analysis_chromatinization() of the NucleiFinder class.
552
553
554        Parameters
555        ----------
556        results_dict : dict
557            Dictionary returned by `series_analysis_nuclei()` or
558            `series_analysis_chromatinization()`.
559
560        experiment_name : str
561            Name of the experiment.
562
563        Returns
564        -------
565        ImagesManagement
566
567        """
568
569        res_dict = {}
570        id_list = []
571
572        if set(results_dict[list(results_dict.keys())[0]].keys()) != set(
573            ["stats", "img"]
574        ):
575            raise ValueError(
576                "Incorrect data provided. The data must come from series_analysis_nuclei() "
577                "or series_analysis_chromatinization() of the NucleiFinder class."
578            )
579
580        for k in results_dict.keys():
581            res_dict[k] = results_dict[k]["img"]
582            id_list.append(re.sub("_.*", "", k))
583
584        return cls(id_list, res_dict, experiment_name)

Load results exported from NucleiFinder series analysis.

Initialize the object with results from series_analysis_nuclei() or series_analysis_chromatinization() of the NucleiFinder class.

Parameters

results_dict : dict Dictionary returned by series_analysis_nuclei() or series_analysis_chromatinization().

experiment_name : str Name of the experiment.

Returns

ImagesManagement

@classmethod
def load_images_ids(cls, images_ids: list, experiment_name: str):
586    @classmethod
587    def load_images_ids(cls, images_ids: list, experiment_name: str):
588        """
589        Initialize the object with list of images IDs for porcesing.
590
591        Parameters
592        ----------
593        images_ids : list[int]
594            List of selected image IDs.
595
596        experiment_name : str
597            Name of the experiment.
598
599        Returns
600        -------
601        ImagesManagement
602
603        """
604
605        if len(images_ids) == 0:
606            raise ValueError(
607                "Incorrect data provided. There must be a list of image IDs."
608            )
609
610        return cls(images_ids, None, experiment_name)

Initialize the object with list of images IDs for porcesing.

Parameters

images_ids : list[int] List of selected image IDs.

experiment_name : str Name of the experiment.

Returns

ImagesManagement

def get_included_acronyms(self):
612    def get_included_acronyms(self):
613        """
614        Print the data acronyms for adjusted images, processed using the
615        self.adjust_images(), self.image_merging(), and self.image_stitching() methods.
616
617        Acronym information is essential for retrieving and saving data using
618        the self.get_prepared_images() and self.save_prepared_images() methods.
619
620        Notes
621        -----
622        This method prints the list of available acronyms but does not return it.
623
624        """
625
626        if len(self.prepared_images.keys()) > 0:
627            print("\nAvaiable stored images:\n")
628            for kd in self.prepared_images.keys():
629                print(kd)
630
631        else:
632            print("Nothing to return!")

Print the data acronyms for adjusted images, processed using the self.adjust_images(), self.image_merging(), and self.image_stitching() methods.

Acronym information is essential for retrieving and saving data using the self.get_prepared_images() and self.save_prepared_images() methods.

Notes

This method prints the list of available acronyms but does not return it.

def get_prepared_images(self, acronyme=None):
634    def get_prepared_images(self, acronyme=None):
635        """
636        Retrieves the prepared images (returned from adjust_images()) stored in the object.
637
638
639        Parameters
640        ----------
641        acronyme : str or None
642            Acronym identifying a processed image set. If None, prints available keys.
643
644
645        Returns
646        -------
647        dict
648            Dictionary of prepared images.
649        """
650
651        if acronyme is None:
652
653            self.get_included_acronyms()
654
655        else:
656
657            if acronyme in list(self.prepared_images.keys()):
658                return self.prepared_images[acronyme]
659
660            raise ValueError("Incorrect acronyme!")

Retrieves the prepared images (returned from adjust_images()) stored in the object.

Parameters

acronyme : str or None Acronym identifying a processed image set. If None, prints available keys.

Returns

dict Dictionary of prepared images.

def save_prepared_images(self, acronyme: str, path_to_save: str = ''):
662    def save_prepared_images(self, acronyme: str, path_to_save: str = ""):
663        """
664        Saves prepared images (returned from adjust_images() method) to the specified directory.
665
666        Parameters
667        ----------
668        path_to_save : str
669            Directory path where the images will be saved. Default is the current working directory.
670
671        """
672        if acronyme is None:
673
674            self.get_included_acronyms()
675
676        else:
677
678            if acronyme in list(self.prepared_images.keys()):
679
680                path_to_save = os.path.join(
681                    path_to_save, f"{self.experiment_name}_{acronyme}"
682                )
683
684                if not os.path.exists(path_to_save):
685                    os.makedirs(path_to_save, exist_ok=True)
686
687                for i in tqdm(self.prepared_images[acronyme].keys()):
688                    cv2.imwrite(
689                        os.path.join(path_to_save, i + ".png"),
690                        self.prepared_images[acronyme][i],
691                    )
692
693            else:
694                raise ValueError("Incorrect acronyme!")

Saves prepared images (returned from adjust_images() method) to the specified directory.

Parameters

path_to_save : str Directory path where the images will be saved. Default is the current working directory.

def adjust_images( self, acronyme: str, path_to_images: str, file_extension: str = 'tif', eq: bool = True, clahe: bool = True, kernal: tuple = (50, 50), fille_name_part: str = '', color: str = 'gray', max_intensity: int = 65535, min_intenisty: int = 0, brightness: int = 1000, contrast: float = 1.0, gamma: float = 1.0, img_n: int = 0):
696    def adjust_images(
697        self,
698        acronyme: str,
699        path_to_images: str,
700        file_extension: str = "tif",
701        eq: bool = True,
702        clahe: bool = True,
703        kernal: tuple = (50, 50),
704        fille_name_part: str = "",
705        color: str = "gray",
706        max_intensity: int = 65535,
707        min_intenisty: int = 0,
708        brightness: int = 1000,
709        contrast: float = 1.0,
710        gamma: float = 1.0,
711        img_n: int = 0,
712    ):
713        """
714        Prepares selected images for processing, applying histogram equalization and CLAHE, if required.
715
716        Parameters
717        ----------
718        acronyme : str
719            Name of images being adjusted in this run.
720
721        path_to_images : str
722            Path to the directory containing images.
723
724        file_extension : str
725            Image file extension. Default is 'tiff'.
726
727        eq : bool
728            Whether to apply histogram equalization. Default is True.
729
730        clahe : bool
731            Whether to apply CLAHE. Default is True.
732
733        kernal : tuple
734            Kernel size for CLAHE. Default is (50, 50).
735
736        fille_name_part : str
737            Part of the file name to filter images. Default is an empty string.
738
739        color : str
740            Color space to use. Default is 'gray'.
741
742        max_intensity : int
743            Maximum intensity for image adjustment. Default is 65535.
744
745        min_intenisty : int
746            Minimum intensity for image adjustment. Default is 0.
747
748        brightness : int
749            Brightness adjustment value. Default is 1000.
750
751        contrast : float
752            Contrast adjustment factor. Default is 1.0.
753
754        gamma : float
755            Gamma correction factor. Default is 1.0.
756
757        img_n : int
758            Number of images to process. Default is 0, which means all images.
759
760
761        Returns
762        -------
763        dict
764            Dictionary containing the processed images.
765
766        Notes
767        -----
768        To access the processed images, use the ``get_prepared_images()`` method.
769
770        To save the processed images to disk, use the ``save_prepared_images()`` method.
771        """
772
773        results_dict = {}
774
775        files = glob.glob(os.path.join(path_to_images, "*." + file_extension))
776
777        if len(fille_name_part) > 0:
778            files = [x for x in files if fille_name_part.lower() in x.lower()]
779
780        selected_id = self.images_ids
781
782        if len(selected_id) > 0:
783            selected_id = [str(x) for x in selected_id]
784            files = [
785                x
786                for x in files
787                if re.sub("_.*", "", os.path.basename(x)) in selected_id
788            ]
789
790        if img_n > 0:
791
792            files = random.sample(files, img_n)
793
794        for file in tqdm(files):
795
796            image = load_image(file)
797
798            try:
799                image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
800            except:
801                pass
802
803            if eq is True:
804                image = equalizeHist_16bit(image)
805
806            if clahe is True:
807                image = clahe_16bit(image, kernal=kernal)
808
809            image = adjust_img_16bit(
810                img=image,
811                color=color,
812                max_intensity=max_intensity,
813                min_intenisty=min_intenisty,
814                brightness=brightness,
815                contrast=contrast,
816                gamma=gamma,
817            )
818
819            results_dict[os.path.basename(file)] = image
820
821        self.prepared_images[acronyme] = results_dict

Prepares selected images for processing, applying histogram equalization and CLAHE, if required.

Parameters

acronyme : str Name of images being adjusted in this run.

path_to_images : str Path to the directory containing images.

file_extension : str Image file extension. Default is 'tiff'.

eq : bool Whether to apply histogram equalization. Default is True.

clahe : bool Whether to apply CLAHE. Default is True.

kernal : tuple Kernel size for CLAHE. Default is (50, 50).

fille_name_part : str Part of the file name to filter images. Default is an empty string.

color : str Color space to use. Default is 'gray'.

max_intensity : int Maximum intensity for image adjustment. Default is 65535.

min_intenisty : int Minimum intensity for image adjustment. Default is 0.

brightness : int Brightness adjustment value. Default is 1000.

contrast : float Contrast adjustment factor. Default is 1.0.

gamma : float Gamma correction factor. Default is 1.0.

img_n : int Number of images to process. Default is 0, which means all images.

Returns

dict Dictionary containing the processed images.

Notes

To access the processed images, use the get_prepared_images() method.

To save the processed images to disk, use the save_prepared_images() method.

def image_merging(self, acronyms: list, ratio_list: list):
823    def image_merging(self, acronyms: list, ratio_list: list):
824        """
825        Merge previously prepared images stored in `self.prepared_images`,
826        adjusted based on the image ratios. The used ratios adjust relative image intensity.
827
828        Parameters
829        ----------
830        acronyme : list
831            List of image names to be merged.
832
833        ratio_list : list[float]
834            List of ratio intensity values (0.0–1.0) for the merged image.
835            The `acronyme` list and `ratio_list` must be of the same length.
836
837        Returns
838        -------
839        dict
840            Dictionary of processed images.
841
842        Notes
843        -----
844        To access the processed images, use the ``get_prepared_images()`` method.
845
846        To save the processed images to disk, use the ``save_prepared_images()`` method.
847        """
848
849        for a in acronyms:
850            if a not in list(self.prepared_images.keys()):
851                raise ValueError(f"Incorrect {a} acronyme!")
852
853        results_img = {}
854        for k in self.images_ids:
855            img_list = []
856            for a in acronyms:
857                nam = [
858                    x
859                    for x in self.prepared_images[a].keys()
860                    if str(k) == re.sub("_.*", "", x)
861                ]
862                if len(nam) == 0:
863                    print(f"There were not images for {k} ids")
864                    break
865
866                img_list.append(self.prepared_images[a][nam[0]])
867
868            if len(img_list) == len(acronyms):
869                results_img[f'{k}_{"_".join(acronyms)}'] = merge_images(
870                    img_list, ratio_list
871                )
872
873        self.prepared_images[f'merged_{"_".join(acronyms)}'] = results_img
874
875        print(f'Images stored in self.prepared_images["merged_{"_".join(acronyms)}"]')

Merge previously prepared images stored in self.prepared_images, adjusted based on the image ratios. The used ratios adjust relative image intensity.

Parameters

acronyme : list List of image names to be merged.

ratio_list : list[float] List of ratio intensity values (0.0–1.0) for the merged image. The acronyme list and ratio_list must be of the same length.

Returns

dict Dictionary of processed images.

Notes

To access the processed images, use the get_prepared_images() method.

To save the processed images to disk, use the save_prepared_images() method.

def image_stitching(self, acronyms: list, to_results_image: bool = False):
877    def image_stitching(self, acronyms: list, to_results_image: bool = False):
878        """
879        Stitch (horizontally) previously prepared images stored in `self.prepared_images`.
880
881        Parameters
882        ----------
883        acronyme : list
884            List of image names to be stitched.
885
886        to_results_image : bool
887            Boolean value indicating whether images obtained from the
888            `series_analysis_nuclei()` or `series_analysis_chromatinization()`
889            methods of the `NucleiFinder` class should be stitched to the right
890            side of the images in the `acronyme` list.
891
892        Returns
893        -------
894        dict
895            Dictionary of processed images.
896
897        Notes
898        -----
899        To access the processed images, use the ``get_prepared_images()`` method.
900
901        To save the processed images to disk, use the ``save_prepared_images()`` method.
902        """
903
904        for a in acronyms:
905            if a not in list(self.prepared_images.keys()):
906                raise ValueError(f"Incorrect {a} acronyme!")
907
908        results_img = {}
909        for k in tqdm(self.images_ids):
910            img_list = []
911            for a in acronyms:
912                nam = [
913                    x
914                    for x in self.prepared_images[a].keys()
915                    if str(k) == re.sub("_.*", "", x)
916                ]
917                if len(nam) == 0:
918                    print(f"There were not images for {k} ids")
919                    break
920
921                img_list.append(self.prepared_images[a][nam[0]])
922
923            if to_results_image:
924                nam = [
925                    x
926                    for x in self.results_images.keys()
927                    if str(k) == re.sub("_.*", "", x)
928                ]
929                if len(nam) != 0:
930                    img_list.append(self.results_images[nam[0]])
931
932                if len(img_list) == len(acronyms) + 1:
933                    results_img[f'{k}_{"_".join(acronyms)}_res'] = cv2.hconcat(img_list)
934
935            elif to_results_image is not False:
936                if len(img_list) == len(acronyms):
937                    results_img[f'{k}_{"_".join(acronyms)}'] = cv2.hconcat(img_list)
938
939        self.prepared_images[f'stitched_{"_".join(acronyms)}'] = results_img
940
941        print(f'Images stored in self.prepared_images["stitched_{"_".join(acronyms)}"]')

Stitch (horizontally) previously prepared images stored in self.prepared_images.

Parameters

acronyme : list List of image names to be stitched.

to_results_image : bool Boolean value indicating whether images obtained from the series_analysis_nuclei() or series_analysis_chromatinization() methods of the NucleiFinder class should be stitched to the right side of the images in the acronyme list.

Returns

dict Dictionary of processed images.

Notes

To access the processed images, use the get_prepared_images() method.

To save the processed images to disk, use the save_prepared_images() method.

def save_raw(self, path_to_save: str = ''):
943    def save_raw(self, path_to_save: str = ""):
944        """
945        Save `self.results_images` loaded by the `self.load_experimental_images()` method,
946        obtained from the `series_analysis_nuclei()` or `series_analysis_chromatinization()`
947        methods of the `NucleiFinder` class for later usage with cls.load_from_dict() method.
948        The data will be saved with a `.inuc` extension.
949
950        Parameters
951        ----------
952        path_to_save : str
953            The directory path where the images will be saved.
954            Default is the current working directory.
955        """
956
957        full_path = os.path.join(path_to_save, f"{self.experiment_name}.inuc")
958
959        np.savez(full_path, **self.results_images)

Save self.results_images loaded by the self.load_experimental_images() method, obtained from the series_analysis_nuclei() or series_analysis_chromatinization() methods of the NucleiFinder class for later usage with cls.load_from_dict() method. The data will be saved with a .inuc extension.

Parameters

path_to_save : str The directory path where the images will be saved. Default is the current working directory.

class NucleiFinder(jimg_ncd.utils.ImageTools, RepTools):
 962class NucleiFinder(ImageTools, RepTools):
 963    """
 964    Implements a comprehensive pipeline for automated segmentation,
 965    selection, and analysis of cell nuclei and their internal chromatin structure
 966    in microscopy images.
 967
 968    It utilizes a pre-trained deep learning model (StarDist2D) for initial
 969    nuclear identification, followed by the application of advanced morphological
 970    and intensity filters, and a dedicated algorithm for quantifying chromatinization.
 971    The class provides detailed control over the hyperparameters for both the
 972    segmentation process and image preprocessing stages.
 973
 974    Parameters
 975    ----------
 976    image : np.ndarray, optional
 977        The input image (typically 16-bit) for analysis.
 978
 979    test_results : list, optional
 980        Plots resulting from parameter testing (e.g., NMS/Prob combinations).
 981
 982    hyperparameter_nuclei : dict, optional
 983        Parameters for nuclei segmentation and filtering (e.g., 'nms', 'prob', 'min_size', 'circularity').
 984
 985    hyperparameter_chromatinization : dict, optional
 986        Parameters for segmenting and filtering chromatin spots (e.g., 'cut_point', 'ratio').
 987
 988    img_adj_par_chrom : dict, optional
 989        Image adjustment parameters (gamma, contrast) specifically for chromatin analysis.
 990
 991    img_adj_par : dict, optional
 992        Image adjustment parameters for nuclei segmentation.
 993
 994    show_plots : bool, optional
 995        Flag controlling the automatic display of visual results.
 996
 997    nuclei_results : dict, optional
 998        A dictionary storing numerical data (features) extracted from the nuclei.
 999
1000    images : dict, optional
1001        A dictionary storing output images and masks.
1002
1003    Attributes
1004    ----------
1005    image : np.ndarray
1006        The currently loaded image for analysis.
1007
1008    test_results : list
1009        The visual outcomes of NMS/Prob parameter tests.
1010
1011    hyperparameter_nuclei : dict
1012        A dictionary of active parameters used by the `find_nuclei()` and `select_nuclei()` methods.
1013
1014    hyperparameter_chromatinization : dict
1015        A dictionary of active parameters used by the `nuclei_chromatinization()` method.
1016
1017    img_adj_par : dict
1018        Image correction parameters for nuclei segmentation.
1019
1020    img_adj_par_chrom : dict
1021        Image correction parameters for chromatin analysis.
1022
1023    show_plots : bool
1024        The state of the plot display flag.
1025
1026    nuclei_results : dict
1027        Stores feature dictionaries for: all detected ('nuclei'), selected ('nuclei_reduced'),
1028        and chromatinization data ('nuclei_chromatinization').
1029
1030    images : dict
1031        Stores masks and images visualizing the results.
1032
1033    series_im : bool
1034        Flag indicating if the class is operating in a batch or series processing mode.
1035
1036    Methods
1037    -------
1038    set_nms(nms)
1039        Sets the Non-Maximum Suppression (NMS) threshold.
1040
1041    set_prob(prob)
1042        Sets the segmentation probability threshold.
1043
1044    set_nuclei_circularity(circ)
1045        Sets the minimum required circularity for a nucleus.
1046
1047    set_nuclei_local_intenisty_FC(local_FC)
1048        Sets the factor used for removing false positives based on local intensity differences.
1049
1050    set_nuclei_global_area_FC(global_FC)
1051        Sets the factor used for removing size-based outlier false positives.
1052
1053    set_nuclei_size(size)
1054        Sets the minimum and maximum area (in pixels) for nuclei selection.
1055
1056    set_nuclei_min_mean_intensity(intensity)
1057        Sets the minimum required mean intensity value for a nucleus.
1058
1059    set_chromatinization_size(size)
1060        Sets the minimum and maximum area (in pixels) for chromatin spot selection.
1061
1062    set_chromatinization_cut_point(cut_point)
1063        Sets the factor used to adjust the chromatin segmentation threshold (Otsu's method).
1064
1065    set_adj_image_gamma(gamma)
1066        Sets the gamma correction for the nuclei image.
1067
1068    set_adj_chrom_contrast(contrast)
1069        Sets the contrast adjustment for the chromatinization image.
1070
1071    current_parameters_nuclei (property)
1072        Returns the active nuclei segmentation and filtering parameters.
1073
1074    find_nuclei()
1075        Performs nuclei segmentation using StarDist and extracts initial features.
1076
1077    select_nuclei()
1078        Filters the detected nuclei based on set morphological and intensity criteria.
1079
1080    nuclei_chromatinization()
1081        Performs quantitative and morphological analysis of chromatin spots in selected nuclei.
1082
1083    get_features(model_out, image)
1084        Calculates geometric and intensity features from a segmented mask (label image).
1085
1086    Notes
1087    -----
1088    The typical analysis workflow follows this order:
1089    1. `input_image()`
1090    2. `find_nuclei()`
1091    3. `select_nuclei()` (Optional)
1092    4. `nuclei_chromatinization()` (Optional)
1093    """
1094
1095    def __init__(
1096        self,
1097        image=None,
1098        test_results=None,
1099        hyperparameter_nuclei=None,
1100        hyperparameter_chromatinization=None,
1101        img_adj_par_chrom=None,
1102        img_adj_par=None,
1103        show_plots=None,
1104        nuclei_results=None,
1105        images=None,
1106    ):
1107        """
1108        The main class for the detection and analysis of cell nuclei and their chromatinization
1109        in microscopy or flow cytometry images, utilizing the StarDist segmentation model.
1110
1111        This class inherits functionality for image processing (ImageTools) and
1112        results handling (RepTools).
1113
1114        Parameters
1115        ----------
1116        image : np.ndarray, optional
1117            The input image for analysis.
1118            Default: None.
1119
1120        test_results : list, optional
1121            A list of plots or images resulting from parameter testing.
1122            Default: None.
1123
1124        hyperparameter_nuclei : dict, optional
1125            The segmentation parameters for nuclei detection.
1126            Default:
1127            {'nms': 0.8, 'prob': 0.4, 'max_size': 1000, 'min_size': 20,
1128             'circularity': 0.6, 'intensity_mean': 6553.5, 'nn_min': 10,
1129             'FC_diff_global': 1.5, 'FC_diff_local_intensity': 0.6}
1130
1131        hyperparameter_chromatinization : dict, optional
1132            The analysis parameters for chromatin spots within the nuclei.
1133            Default:
1134            {'max_size': 800, 'min_size': 2, 'ratio': 0.1, 'cut_point': 0.95}
1135
1136        img_adj_par_chrom : dict, optional
1137            Image adjustment parameters (gamma, contrast, brightness) for chromatin analysis.
1138            Default: {'gamma': 0.25, 'contrast': 5, 'brightness': 950}
1139
1140        img_adj_par : dict, optional
1141            Image adjustment parameters (gamma, contrast, brightness) for nuclei segmentation.
1142            Default: {'gamma': 0.9, 'contrast': 2, 'brightness': 1000}
1143
1144        show_plots : bool, optional
1145            Flag to determine whether results and plots should be displayed automatically.
1146            Default: True.
1147
1148        nuclei_results : dict, optional
1149            A dictionary storing the numerical results of the analysis.
1150            Default: {'nuclei': None, 'nuclei_reduced': None, 'nuclei_chromatinization': None}
1151
1152        images : dict, optional
1153            A dictionary storing the output images (e.g., masks).
1154            Default: {'nuclei': None, 'nuclei_reduced': None, 'nuclei_chromatinization': None}
1155
1156        Attributes
1157        ----------
1158        image : np.ndarray
1159            The currently loaded image for analysis.
1160
1161        hyperparameter_nuclei : dict
1162            Active nuclei segmentation parameters.
1163
1164        hyperparameter_chromatinization : dict
1165            Active chromatinization analysis parameters.
1166
1167        img_adj_par : dict
1168            Active image correction parameters for nuclei segmentation.
1169
1170        img_adj_par_chrom : dict
1171            Active image correction parameters for chromatin analysis.
1172
1173        show_plots : bool
1174            The current state of the plot display flag.
1175
1176        series_im : bool
1177            Flag indicating if a series of images is being processed.
1178
1179        Notes
1180        -----
1181        The default value for 'intensity_mean' in hyperparameter_nuclei is calculated
1182        as $(2^{16} - 1) / 10$, which represents 10% of the maximum 16-bit value (65535 / 10 = 6553.5).
1183
1184        The image adjustment parameters are crucial for optimizing contrast and brightness
1185        to improve the performance of both the StarDist model and the subsequent
1186        chromatin thresholding.
1187        """
1188
1189        # Use default values if parameters are None
1190        self.image = image or None
1191        """Loaded input image."""
1192        self.test_results = test_results or None
1193        """Results of parameter tests.
1194
1195            This attribute or method stores the outcomes of parameter testing procedures.
1196            For interactive browsing and inspection of the results, use the 
1197            `browser_test(self)` method."""
1198
1199        self.hyperparameter_nuclei = hyperparameter_nuclei or {
1200            "nms": 0.8,
1201            "prob": 0.4,
1202            "max_size": 1000,
1203            "min_size": 20,
1204            "circularity": 0.6,
1205            "intensity_mean": (2**16 - 1) / 10,
1206            "nn_min": 10,
1207            "FC_diff_global": 1.5,
1208            "FC_diff_local_intensity": 0.6,
1209        }
1210        """Active nuclei segmentation/filter parameters."""
1211
1212        self.hyperparameter_chromatinization = hyperparameter_chromatinization or {
1213            "max_size": 800,
1214            "min_size": 2,
1215            "ratio": 0.1,
1216            "cut_point": 0.95,
1217        }
1218        """Active chromatin analysis parameters."""
1219
1220        self.img_adj_par_chrom = img_adj_par_chrom or {
1221            "gamma": 0.25,
1222            "contrast": 5,
1223            "brightness": 950,
1224        }
1225        """Image adjustment for chromatin analysis."""
1226
1227        self.img_adj_par = img_adj_par or {
1228            "gamma": 0.9,
1229            "contrast": 2,
1230            "brightness": 1000,
1231        }
1232        """Image adjustment for nuclei segmentation."""
1233
1234        self.show_plots = show_plots or True
1235        """Flag controlling plot display."""
1236
1237        self.nuclei_results = nuclei_results or {
1238            "nuclei": None,
1239            "nuclei_reduced": None,
1240            "nuclei_chromatinization": None,
1241        }
1242        """Stored dictionary of nuclei analysis results."""
1243
1244        self.images = images or {
1245            "nuclei": None,
1246            "nuclei_reduced": None,
1247            "nuclei_chromatinization": None,
1248        }
1249        """Stored dictionary of images from nuclei analysis."""
1250
1251        # sereies images
1252        self.series_im = False
1253        """Flag for batch/series image processing."""
1254
1255    def set_nms(self, nms: float):
1256        """
1257        Set the Non-Maximum Suppression (NMS) threshold.
1258
1259        The NMS threshold controls how aggressively overlapping detections are suppressed.
1260        A lower value reduces the probability of overlapping nuclei being kept.
1261
1262        Parameters
1263        ----------
1264        nms : float
1265            The NMS IoU threshold value.
1266        """
1267
1268        self.hyperparameter_nuclei["nms"] = nms
1269
1270    def set_prob(self, prob: float):
1271        """
1272        Set the probability threshold used in segmentation.
1273
1274        The probability threshold determines the minimum confidence required for an object
1275        (e.g., a nucleus) to be classified as a segmented entity. Higher values result in
1276        fewer segmented objects, as only detections with strong confidence scores are kept.
1277        This may lead to omission of weaker or less distinct structures.
1278
1279        Because optimal values depend on image characteristics, it is important to visually
1280        inspect segmentation results produced with different thresholds to determine the
1281        most suitable setting.
1282
1283        Parameters
1284        ----------
1285        prob : float
1286            The probability threshold value.
1287        """
1288
1289        self.hyperparameter_nuclei["prob"] = prob
1290
1291    def set_nuclei_circularity(self, circ: float):
1292        """
1293        This method sets 'circ' parameter. The circ is a parameter used for adjust minimal nucleus circularity.
1294
1295        Parameters
1296        ----------
1297        circ : float
1298            Nuclei circularity value.
1299        """
1300
1301        self.hyperparameter_nuclei["circularity"] = circ
1302
1303    def set_nuclei_local_intenisty_FC(self, local_FC: float):
1304        """
1305        This method sets the 'FC_diff_local_intensity' parameter. The 'local_FC' is used to remove false positive multiple nuclei that were detected in single image.
1306
1307        Parameters
1308        ----------
1309        local_FC : float
1310            local_FC value.
1311        """
1312
1313        self.hyperparameter_nuclei["FC_diff_local_intensity"] = local_FC
1314
1315    # change
1316    def set_nuclei_global_area_FC(self, global_FC: float):
1317        """
1318        This method sets the 'FC_diff_global' parameter. The 'global_FC' is used to remove false positive multiple nuclei that were detected in a single image and are outliers from the global mean area size.
1319
1320        Parameters
1321        ----------
1322        FC_diff_global : float
1323            global_FC value.
1324        """
1325
1326        self.hyperparameter_nuclei["FC_diff_global"] = global_FC
1327
1328    def set_nuclei_size(self, size: tuple):
1329        """
1330        This method sets 'size' parameter. The size is a parameter used for adjust minimal and maximal nucleus area (px).
1331
1332        Parameters
1333        ----------
1334        size : tuple
1335            (min_value, max_value)
1336        """
1337
1338        self.hyperparameter_nuclei["min_size"] = size[0]
1339        self.hyperparameter_nuclei["max_size"] = size[1]
1340
1341    def set_nuclei_min_mean_intensity(self, intensity: int):
1342        """
1343        This method sets 'intensity' parameter. The 'intensity' parameter is used to adjust the minimum mean intensity of all pixel intensities within the nucleus.
1344
1345        Parameters
1346        ----------
1347        intensity : int
1348            intensity value.
1349        """
1350
1351        self.hyperparameter_nuclei["intensity_mean"] = intensity
1352
1353    def set_chromatinization_size(self, size: tuple):
1354        """
1355        This method sets 'size' parameter. The size is a parameter used for adjust minimal and maximal chromanitization spot area (px) within the nucleus.
1356
1357        Parameters
1358        ----------
1359        size : tuple
1360            (min_value, max_value)
1361        """
1362
1363        self.hyperparameter_chromatinization["min_size"] = size[0]
1364        self.hyperparameter_chromatinization["max_size"] = size[1]
1365
1366    def set_chromatinization_ratio(self, ratio: int):
1367        """
1368        This method sets the 'ratio' parameter. In this case, the 'ratio' parameter is similar to 'circularity' as it describes the ratio between the maximum lengths in the x and y dimensions of the nucleus chromatinization.
1369
1370        Parameters
1371        ----------
1372        ratio : float
1373            ratio value.
1374        """
1375
1376        self.hyperparameter_chromatinization["ratio"] = ratio
1377
1378    def set_chromatinization_cut_point(self, cut_point: int):
1379        """
1380        This method sets the 'cut_point' parameter. The 'cut_point' parameter is a factor used to adjust the threshold for separating the background from chromatin spots.
1381
1382        Parameters
1383        ----------
1384        cut_point : int
1385            cut_point value.
1386        """
1387
1388        self.hyperparameter_chromatinization["cut_point"] = cut_point
1389
1390    #
1391
1392    def set_adj_image_gamma(self, gamma: float):
1393        """
1394        This method sets 'gamma' parameter. The gamma is a parameter used for adjust gamma of the nucleus image.
1395
1396        Parameters
1397        ----------
1398        gamma : float
1399            gamma value.
1400        """
1401
1402        self.img_adj_par["gamma"] = gamma
1403
1404    def set_adj_image_contrast(self, contrast: float):
1405        """
1406        This method sets 'contrast' parameter. The contrast is a parameter used for adjust contrast of the nucleus image.
1407
1408        Parameters
1409        ----------
1410        contrast : float
1411            contrast value.
1412        """
1413
1414        self.img_adj_par["contrast"] = contrast
1415
1416    def set_adj_image_brightness(self, brightness: float):
1417        """
1418        This method sets 'brightness' parameter. The brightness is a parameter used for adjust brightness of the nucleus image.
1419
1420        Parameters
1421        ----------
1422        brightness : float
1423            brightness value.
1424        """
1425
1426        self.img_adj_par["brightness"] = brightness
1427
1428    #
1429
1430    def set_adj_chrom_gamma(self, gamma: float):
1431        """
1432        This method sets 'gamma' parameter. The gamma is a parameter used for adjust gamma of the nucleus chromatinization image.
1433
1434        Parameters
1435        ----------
1436        gamma : float
1437            gamma value.
1438        """
1439
1440        self.img_adj_par_chrom["gamma"] = gamma
1441
1442    def set_adj_chrom_contrast(self, contrast: float):
1443        """
1444        This method sets 'contrast' parameter. The contrast is a parameter used for adjust contrast of the nucleus chromatinization image.
1445
1446        Parameters
1447        ----------
1448        contrast : float
1449            contrast value.
1450        """
1451
1452        self.img_adj_par_chrom["contrast"] = contrast
1453
1454    def set_adj_chrom_brightness(self, brightness: float):
1455        """
1456        This method sets 'brightness' parameter. The brightness is a parameter used for adjust brightness of the nucleus chromatinization image.
1457
1458        Parameters
1459        ----------
1460        brightness : float
1461            brightness value.
1462        """
1463
1464        self.img_adj_par_chrom["brightness"] = brightness
1465
1466    @property
1467    def current_parameters_nuclei(self):
1468        """
1469        This method returns current nuclei analysis parameters.
1470
1471        Returns
1472        -------
1473        dict
1474            Nuclei analysis parameters.
1475        """
1476        print(self.hyperparameter_nuclei)
1477        return self.hyperparameter_nuclei
1478
1479    @property
1480    def current_parameters_chromatinization(self):
1481        """
1482        This method returns current nuclei chromatinization analysis parameters.
1483
1484        Returns
1485        -------
1486        dict
1487            Nuclei chromatinization analysis parameters.
1488        """
1489
1490        print(self.hyperparameter_chromatinization)
1491        return self.hyperparameter_chromatinization
1492
1493    @property
1494    def current_parameters_img_adj(self):
1495        """
1496        This method returns current nuclei image setup.
1497
1498        Returns
1499        -------
1500        dict
1501            Nuclei image setup.
1502        """
1503
1504        print(self.img_adj_par)
1505        return self.img_adj_par
1506
1507    @property
1508    def current_parameters_img_adj_chro(self):
1509        """
1510        This method returns current nuclei chromatinization image setup.
1511
1512        Returns
1513        -------
1514        dict
1515            Nuclei chromatinization image setup.
1516        """
1517
1518        print(self.img_adj_par_chrom)
1519        return self.img_adj_par_chrom
1520
1521    def get_results_nuclei(self):
1522        """
1523        This function returns nuclei analysis results.
1524
1525        Returns
1526        -------
1527        dict
1528            Nuclei results in the dictionary format.
1529        """
1530
1531        if self.images["nuclei"] is None:
1532            print("No results to return!")
1533            return None
1534        else:
1535            if cfg._DISPLAY_MODE:
1536                if self.show_plots:
1537                    display_preview(self.resize_to_screen_img(self.images["nuclei"]))
1538            return self.nuclei_results["nuclei"], self.images["nuclei"]
1539
1540    def get_results_nuclei_selected(self):
1541        """
1542        This function returns the results of the nuclei analysis following adjustments to the data selection thresholds.
1543
1544        Returns
1545        -------
1546        dict
1547            Nuclei results in the dictionary format.
1548        """
1549
1550        if self.images["nuclei_reduced"] is None:
1551            print("No results to return!")
1552            return None
1553        else:
1554            if cfg._DISPLAY_MODE:
1555                if self.show_plots:
1556                    display_preview(
1557                        self.resize_to_screen_img(self.images["nuclei_reduced"])
1558                    )
1559            return self.nuclei_results["nuclei_reduced"], self.images["nuclei_reduced"]
1560
1561    def get_results_nuclei_chromatinization(self):
1562        """
1563        This function returns the results of the nuclei chromatinization analysis.
1564
1565        Returns
1566        -------
1567        dict
1568            Nuclei chromatinization results in the dictionary format.
1569        """
1570
1571        if self.images["nuclei_chromatinization"] is None:
1572            print("No results to return!")
1573            return None
1574        else:
1575            if cfg._DISPLAY_MODE:
1576                if self.show_plots:
1577                    display_preview(self.images["nuclei_chromatinization"])
1578            return (
1579                self.nuclei_results["nuclei_chromatinization"],
1580                self.images["nuclei_chromatinization"],
1581            )
1582
1583    def add_test(self, plots):
1584        self.test_results = plots
1585
1586        """
1587        Helper method.
1588        """
1589
1590    def input_image(self, img):
1591        """
1592        This method adds the image to the class for nuclei and/or chromatinization analysis.
1593
1594        Parameters
1595        ----------
1596        img : np.ndarray
1597            Input image.
1598        """
1599
1600        self.image = img
1601        self.add_test(None)
1602
1603    def get_features(self, model_out, image):
1604        """
1605        Extracts numerical feature descriptors from model output for a given image.
1606
1607        This method processes the output returned by a feature-extraction model
1608        (e.g., CNN, encoder network, statistical model) and converts it into a
1609        structured feature vector associated with the provided image.
1610        Typically used for downstream analysis, classification, or clustering.
1611
1612        Parameters
1613        ----------
1614        model_out : any
1615            Output returned by the feature-extraction model.
1616            The expected format depends on the model (e.g., tensor, dict, list of arrays).
1617
1618        image : ndarray
1619            The input image (2D or 3D array) for which features are being extracted.
1620            Provided for reference or for combining raw image metrics with model features.
1621
1622        Returns
1623        -------
1624        features : dict
1625            Dictionary containing extracted features.
1626            Keys correspond to feature names, and values are numerical descriptors.
1627        """
1628
1629        features = {
1630            "label": [],
1631            "area": [],
1632            "area_bbox": [],
1633            "area_convex": [],
1634            "area_filled": [],
1635            "axis_major_length": [],
1636            "axis_minor_length": [],
1637            "eccentricity": [],
1638            "equivalent_diameter_area": [],
1639            "feret_diameter_max": [],
1640            "solidity": [],
1641            "perimeter": [],
1642            "perimeter_crofton": [],
1643            "circularity": [],
1644            "intensity_max": [],
1645            "intensity_mean": [],
1646            "intensity_min": [],
1647            "ratio": [],
1648            "coords": [],
1649        }
1650
1651        for region in skimage.measure.regionprops(model_out, intensity_image=image):
1652
1653            # Compute circularity
1654            if region.perimeter > 0:
1655                circularity = 4 * np.pi * region.area / (region.perimeter**2)
1656            else:
1657                circularity = 0
1658
1659            features["area"].append(region.area)
1660            features["area_bbox"].append(region.area_bbox)
1661            features["area_convex"].append(region.area_convex)
1662            features["area_filled"].append(region.area_filled)
1663            features["axis_major_length"].append(region.axis_major_length)
1664            features["axis_minor_length"].append(region.axis_minor_length)
1665            features["eccentricity"].append(region.eccentricity)
1666            features["equivalent_diameter_area"].append(region.equivalent_diameter_area)
1667            features["feret_diameter_max"].append(region.feret_diameter_max)
1668            features["solidity"].append(region.solidity)
1669            features["perimeter"].append(region.perimeter)
1670            features["perimeter_crofton"].append(region.perimeter_crofton)
1671            features["label"].append(region.label)
1672            features["coords"].append(region.coords)
1673            features["circularity"].append(circularity)
1674            features["intensity_max"].append(np.max(region.intensity_max))
1675            features["intensity_min"].append(np.max(region.intensity_min))
1676            features["intensity_mean"].append(np.max(region.intensity_mean))
1677
1678        ratios = []
1679
1680        # Calculate the ratio for each pair of values
1681        for min_len, max_len in zip(
1682            features["axis_minor_length"], features["axis_major_length"]
1683        ):
1684            if max_len != 0:
1685                ratio = min_len / max_len
1686                ratios.append(ratio)
1687            else:
1688                ratios.append(float(0.0))
1689
1690        features["ratio"] = ratios
1691
1692        return features
1693
1694    # repaired stat
1695    def nuclei_finder_test(self):
1696        """
1697        This method performs testing analysis of parameters (specified 'nms' and 'prob' parameters)
1698        for the image provided by the input_image() method.
1699
1700        This method evaluates the performance of the internal NucleiFinder
1701        configuration using the currently loaded images, parameters, or model
1702        settings. It is typically used to check whether the detection, segmentation
1703        or preprocessing stages run correctly on sample data.
1704
1705        Examples
1706        --------
1707        >>> nf.nuclei_finder_test()
1708        >>> nf.browser_test()
1709        """
1710
1711        StarDist2D.from_pretrained()
1712        model = StarDist2D.from_pretrained("2D_versatile_fluo")
1713
1714        nmst = [0.1, 0.2, 0.6]
1715        probt = [0.1, 0.5, 0.9]
1716
1717        try:
1718            img = cv2.cvtColor(self.image, cv2.COLOR_BGR2GRAY)
1719        except:
1720            img = self.image
1721
1722        plot = []
1723
1724        # adj img
1725        img = adjust_img_16bit(
1726            img,
1727            brightness=self.img_adj_par["brightness"],
1728            contrast=self.img_adj_par["contrast"],
1729            gamma=self.img_adj_par["gamma"],
1730        )
1731        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
1732
1733        fig = plt.figure(dpi=300)
1734        plt.imshow(img)
1735        plt.axis("off")
1736        plt.title("Original", fontsize=25)
1737
1738        if cfg._DISPLAY_MODE:
1739            if self.show_plots:
1740                plt.show()
1741
1742        plot.append(fig)
1743
1744        for n in tqdm(nmst, desc="Loop 1: nmst"):
1745            print(f"\n➡️ Starting outer loop for n = {n}")
1746
1747            for t in tqdm(probt, desc=f"   ↳ Loop 2 for n={n}", leave=False):
1748                print(f"   → Starting inner loop for t = {t}")
1749
1750                labels, _ = model.predict_instances(
1751                    normalize(img.copy()), nms_thresh=n, prob_thresh=t
1752                )
1753
1754                tmp = self.get_features(model_out=labels, image=img)
1755
1756                fig = plt.figure(dpi=300)
1757                plt.imshow(render_label(labels, img=img))
1758                plt.axis("off")
1759                plt.title(
1760                    f"nms {n} & prob {t} \n detected nuc: {len(tmp['area'])}",
1761                    fontsize=25,
1762                )
1763
1764                if cfg._DISPLAY_MODE:
1765                    if self.show_plots:
1766                        plt.show()
1767
1768                plot.append(fig)
1769
1770        self.add_test(plot)
1771
1772    def find_nuclei(self):
1773        """
1774        Performs analysis on the image provided by the ``input_image()`` method
1775        using default or user-defined parameters.
1776
1777        To show current parameters, use:
1778            - ``current_parameters_nuclei``
1779            - ``current_parameters_img_adj``
1780
1781        To set new parameters, use:
1782            - ``set_nms()``
1783            - ``set_prob()``
1784            - ``set_adj_image_gamma()``
1785            - ``set_adj_image_contrast()``
1786            - ``set_adj_image_brightness()``
1787
1788        To get analysis results, use:
1789            - ``get_results_nuclei()``
1790        """
1791
1792        if isinstance(self.image, np.ndarray):
1793
1794            model = StarDist2D.from_pretrained("2D_versatile_fluo")
1795
1796            try:
1797                img = cv2.cvtColor(self.image, cv2.COLOR_BGR2GRAY)
1798            except:
1799                img = self.image
1800
1801            img = adjust_img_16bit(
1802                img,
1803                brightness=self.img_adj_par["brightness"],
1804                contrast=self.img_adj_par["contrast"],
1805                gamma=self.img_adj_par["gamma"],
1806            )
1807            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
1808            labels, _ = model.predict_instances(
1809                normalize(img),
1810                nms_thresh=self.hyperparameter_nuclei["nms"],
1811                prob_thresh=self.hyperparameter_nuclei["prob"],
1812            )
1813
1814            self.nuclei_results["nuclei"] = self.get_features(
1815                model_out=labels, image=img
1816            )
1817
1818            if len(self.nuclei_results["nuclei"]["coords"]) > 0:
1819
1820                oryginal = adjust_img_16bit(img, color="gray")
1821
1822                # series repaired nuclesu
1823                if self.series_im is True:
1824                    self.images["nuclei"] = oryginal
1825                else:
1826                    nuclei_mask = adjust_img_16bit(
1827                        cv2.cvtColor(
1828                            self.create_mask(self.nuclei_results["nuclei"], oryginal),
1829                            cv2.COLOR_BGR2GRAY,
1830                        ),
1831                        color="blue",
1832                    )
1833                    concatenated_image = cv2.hconcat([oryginal, nuclei_mask])
1834                    self.images["nuclei"] = concatenated_image
1835
1836                if cfg._DISPLAY_MODE:
1837                    if self.show_plots:
1838                        display_preview(
1839                            self.resize_to_screen_img(self.images["nuclei"])
1840                        )
1841
1842            else:
1843
1844                self.nuclei_results["nuclei"] = None
1845                self.nuclei_results["nuclei_reduced"] = None
1846                self.nuclei_results["nuclei_chromatinization"] = None
1847
1848                print("Nuclei not detected!")
1849
1850        else:
1851            print("\nAdd image firstly!")
1852
1853    def select_nuclei(self):
1854        """
1855        Selects data obtained from ``find_nuclei()`` based on the set threshold parameters.
1856
1857        To show current parameters, use:
1858            - ``current_parameters_nuclei``
1859
1860        To set new parameters, use:
1861            - ``set_nuclei_circularity()``
1862            - ``set_nuclei_size()``
1863            - ``set_nuclei_min_mean_intensity()``
1864
1865        To get analysis results, use:
1866            - ``get_results_nuclei_selected()``
1867        """
1868
1869        if self.nuclei_results["nuclei"] is not None:
1870            input_in = copy.deepcopy(self.nuclei_results["nuclei"])
1871
1872            nuclei_dictionary = self.drop_dict(
1873                input_in,
1874                key="area",
1875                var=self.hyperparameter_nuclei["min_size"],
1876                action=">",
1877            )
1878            nuclei_dictionary = self.drop_dict(
1879                nuclei_dictionary,
1880                key="area",
1881                var=self.hyperparameter_nuclei["max_size"],
1882                action="<",
1883            )
1884            nuclei_dictionary = self.drop_dict(
1885                nuclei_dictionary,
1886                key="intensity_mean",
1887                var=self.hyperparameter_nuclei["intensity_mean"],
1888                action=">",
1889            )
1890
1891            if len(nuclei_dictionary["coords"]) > 0:
1892
1893                self.nuclei_results["nuclei_reduced"] = nuclei_dictionary
1894
1895                try:
1896                    img = cv2.cvtColor(self.image, cv2.COLOR_BGR2GRAY)
1897                except:
1898                    img = self.image
1899
1900                oryginal = adjust_img_16bit(img, color="gray")
1901
1902                # series repaired nuclesu
1903                if self.series_im is True:
1904                    self.images["nuclei_reduced"] = oryginal
1905                else:
1906                    nuclei_mask = adjust_img_16bit(
1907                        cv2.cvtColor(
1908                            self.create_mask(
1909                                self.nuclei_results["nuclei_reduced"], oryginal
1910                            ),
1911                            cv2.COLOR_BGR2GRAY,
1912                        ),
1913                        color="blue",
1914                    )
1915                    concatenated_image = cv2.hconcat([oryginal, nuclei_mask])
1916
1917                    self.images["nuclei_reduced"] = concatenated_image
1918
1919                if cfg._DISPLAY_MODE:
1920                    if self.show_plots:
1921                        display_preview(
1922                            self.resize_to_screen_img(self.images["nuclei_reduced"])
1923                        )
1924
1925            else:
1926                self.nuclei_results["nuclei"] = None
1927                self.nuclei_results["nuclei_reduced"] = None
1928                self.nuclei_results["nuclei_chromatinization"] = None
1929
1930                print("Selected zero nuclei! Analysis stop!")
1931
1932        else:
1933            print("Lack of nuclei data to select!")
1934
1935    def nuclei_chromatinization(self):
1936        """
1937        Performs chromatinization analysis of nuclei using data obtained from
1938        ``find_nuclei()`` and/or ``select_nuclei()``.
1939
1940        To show current parameters, use:
1941            - ``current_parameters_chromatinization``
1942            - ``current_parameters_img_adj_chro``
1943
1944        To set new parameters, use:
1945            - ``set_chromatinization_size()``
1946            - ``set_chromatinization_ratio()``
1947            - ``set_chromatinization_cut_point()``
1948            - ``set_adj_chrom_gamma()``
1949            - ``set_adj_chrom_contrast()``
1950            - ``set_adj_chrom_brightness()``
1951
1952        To get analysis results, use:
1953            - ``get_results_nuclei_chromatinization()``
1954        """
1955
1956        def add_lists(f, g):
1957
1958            result = []
1959            max_length = max(len(f), len(g))
1960
1961            for i in range(max_length):
1962                f_elem = f[i] if i < len(f) else ""
1963                g_elem = g[i] if i < len(g) else ""
1964                result.append(f_elem + g_elem)
1965
1966            return result
1967
1968        def reverse_coords(image, x, y):
1969
1970            zero = np.zeros(image.shape)
1971
1972            zero[x, y] = 2**16
1973
1974            zero_indices = np.where(zero == 0)
1975
1976            return zero_indices[0], zero_indices[1]
1977
1978        if isinstance(self.nuclei_results["nuclei_reduced"], dict):
1979            nuclei_dictionary = self.nuclei_results["nuclei_reduced"]
1980        else:
1981            nuclei_dictionary = self.nuclei_results["nuclei"]
1982
1983        if nuclei_dictionary is not None:
1984            arrays_list = copy.deepcopy(nuclei_dictionary["coords"])
1985
1986            chromatione_info = {
1987                "area": [],
1988                "area_bbox": [],
1989                "area_convex": [],
1990                "area_filled": [],
1991                "axis_major_length": [],
1992                "axis_minor_length": [],
1993                "eccentricity": [],
1994                "equivalent_diameter_area": [],
1995                "feret_diameter_max": [],
1996                "solidity": [],
1997                "perimeter": [],
1998                "perimeter_crofton": [],
1999                "coords": [],
2000            }
2001
2002            full_im = np.zeros(self.image.shape[0:2], dtype=np.uint16)
2003            full_im = adjust_img_16bit(full_im)
2004
2005            for arr in arrays_list:
2006                x = list(arr[:, 0])
2007                y = list(arr[:, 1])
2008
2009                x1, y1 = reverse_coords(self.image, x, y)
2010
2011                regions_chro2 = self.image.copy()
2012
2013                regions_chro2[x1, y1] = 0
2014
2015                regions_chro2 = regions_chro2.astype("uint16")
2016
2017                try:
2018                    regions_chro2 = cv2.cvtColor(regions_chro2, cv2.COLOR_BGR2GRAY)
2019                except:
2020                    pass
2021
2022                regions_chro2 = adjust_img_16bit(
2023                    regions_chro2,
2024                    brightness=self.img_adj_par_chrom["brightness"],
2025                    contrast=self.img_adj_par_chrom["contrast"],
2026                    gamma=self.img_adj_par_chrom["gamma"],
2027                )
2028
2029                full_im = merge_images(
2030                    image_list=[full_im, regions_chro2], intensity_factors=[1, 1]
2031                )
2032
2033                ret, thresh = cv2.threshold(
2034                    regions_chro2[x, y],
2035                    0,
2036                    2**16 - 1,
2037                    cv2.THRESH_BINARY + cv2.THRESH_OTSU,
2038                )
2039
2040                regions_chro2[
2041                    regions_chro2
2042                    <= ret * self.hyperparameter_chromatinization["cut_point"]
2043                ] = 0
2044
2045                regions_chro2 = cv2.cvtColor(regions_chro2, cv2.COLOR_BGR2GRAY)
2046
2047                chromatione = regions_chro2 > 0
2048
2049                labeled_cells = measure.label(chromatione)
2050                regions = measure.regionprops(labeled_cells)
2051                regions = measure.regionprops(
2052                    labeled_cells, intensity_image=regions_chro2
2053                )
2054
2055                for region in regions:
2056
2057                    chromatione_info["area"].append(region.area)
2058                    chromatione_info["area_bbox"].append(region.area_bbox)
2059                    chromatione_info["area_convex"].append(region.area_convex)
2060                    chromatione_info["area_filled"].append(region.area_filled)
2061                    chromatione_info["axis_major_length"].append(
2062                        region.axis_major_length
2063                    )
2064                    chromatione_info["axis_minor_length"].append(
2065                        region.axis_minor_length
2066                    )
2067                    chromatione_info["eccentricity"].append(region.eccentricity)
2068                    chromatione_info["equivalent_diameter_area"].append(
2069                        region.equivalent_diameter_area
2070                    )
2071                    chromatione_info["feret_diameter_max"].append(
2072                        region.feret_diameter_max
2073                    )
2074                    chromatione_info["solidity"].append(region.solidity)
2075                    chromatione_info["perimeter"].append(region.perimeter)
2076                    chromatione_info["perimeter_crofton"].append(
2077                        region.perimeter_crofton
2078                    )
2079                    chromatione_info["coords"].append(region.coords)
2080
2081            ratios = []
2082
2083            for min_len, max_len in zip(
2084                chromatione_info["axis_minor_length"],
2085                chromatione_info["axis_major_length"],
2086            ):
2087                if max_len != 0:
2088                    ratio = min_len / max_len
2089                    ratios.append(ratio)
2090                else:
2091                    ratios.append(float(0.0))
2092
2093            chromatione_info["ratio"] = ratios
2094
2095            chromation_dic = self.drop_dict(
2096                chromatione_info,
2097                key="area",
2098                var=self.hyperparameter_chromatinization["min_size"],
2099                action=">",
2100            )
2101            chromation_dic = self.drop_dict(
2102                chromation_dic,
2103                key="area",
2104                var=self.hyperparameter_chromatinization["max_size"],
2105                action="<",
2106            )
2107            chromation_dic = self.drop_dict(
2108                chromation_dic,
2109                key="ratio",
2110                var=self.hyperparameter_chromatinization["ratio"],
2111                action=">",
2112            )
2113
2114            arrays_list2 = copy.deepcopy(chromation_dic["coords"])
2115
2116            nuclei_dictionary["spot_size_area"] = []
2117            nuclei_dictionary["spot_size_area_bbox"] = []
2118            nuclei_dictionary["spot_size_area_convex"] = []
2119            nuclei_dictionary["spot_size_area_filled"] = []
2120            nuclei_dictionary["spot_axis_major_length"] = []
2121            nuclei_dictionary["spot_axis_minor_length"] = []
2122            nuclei_dictionary["spot_eccentricity"] = []
2123            nuclei_dictionary["spot_size_equivalent_diameter_area"] = []
2124            nuclei_dictionary["spot_feret_diameter_max"] = []
2125            nuclei_dictionary["spot_perimeter"] = []
2126            nuclei_dictionary["spot_perimeter_crofton"] = []
2127
2128            for i, arr in enumerate(arrays_list):
2129
2130                spot_size_area = []
2131                spot_size_area_bbox = []
2132                spot_size_area_convex = []
2133                spot_size_area_convex = []
2134                spot_size_area_filled = []
2135                spot_axis_major_length = []
2136                spot_axis_minor_length = []
2137                spot_eccentricity = []
2138                spot_size_equivalent_diameter_area = []
2139                spot_feret_diameter_max = []
2140                spot_perimeter = []
2141                spot_perimeter_crofton = []
2142
2143                # Flatten the array,
2144                df_tmp = pd.DataFrame(arr)
2145                df_tmp["duplicates"] = add_lists(
2146                    [str(x) for x in df_tmp[0]], [str(y) for y in df_tmp[1]]
2147                )
2148
2149                counter_tmp = Counter(df_tmp["duplicates"])
2150
2151                for j, arr2 in enumerate(arrays_list2):
2152                    df_tmp2 = pd.DataFrame(arr2)
2153                    df_tmp2["duplicates"] = add_lists(
2154                        [str(x) for x in df_tmp2[0]], [str(y) for y in df_tmp2[1]]
2155                    )
2156
2157                    counter_tmp2 = Counter(df_tmp2["duplicates"])
2158                    intersection_length = len(counter_tmp.keys() & counter_tmp2.keys())
2159                    min_length = min(len(counter_tmp), len(counter_tmp2))
2160
2161                    if intersection_length >= 0.8 * min_length:
2162
2163                        if (
2164                            len(list(df_tmp2["duplicates"]))
2165                            / len(list(df_tmp["duplicates"]))
2166                        ) >= 0.025 and (
2167                            len(list(df_tmp2["duplicates"]))
2168                            / len(list(df_tmp["duplicates"]))
2169                        ) <= 0.5:
2170                            spot_size_area.append(chromation_dic["area"][j])
2171                            spot_size_area_bbox.append(chromation_dic["area_bbox"][j])
2172                            spot_size_area_convex.append(
2173                                chromation_dic["area_convex"][j]
2174                            )
2175                            spot_size_area_filled.append(
2176                                chromation_dic["area_filled"][j]
2177                            )
2178                            spot_axis_major_length.append(
2179                                chromation_dic["axis_major_length"][j]
2180                            )
2181                            spot_axis_minor_length.append(
2182                                chromation_dic["axis_minor_length"][j]
2183                            )
2184                            spot_eccentricity.append(chromation_dic["eccentricity"][j])
2185                            spot_size_equivalent_diameter_area.append(
2186                                chromation_dic["equivalent_diameter_area"][j]
2187                            )
2188                            spot_feret_diameter_max.append(
2189                                chromation_dic["feret_diameter_max"][j]
2190                            )
2191                            spot_perimeter.append(chromation_dic["perimeter"][j])
2192                            spot_perimeter_crofton.append(
2193                                chromation_dic["perimeter_crofton"][j]
2194                            )
2195
2196                nuclei_dictionary["spot_size_area"].append(spot_size_area)
2197                nuclei_dictionary["spot_size_area_bbox"].append(spot_size_area_bbox)
2198                nuclei_dictionary["spot_size_area_convex"].append(spot_size_area_convex)
2199                nuclei_dictionary["spot_size_area_filled"].append(spot_size_area_filled)
2200                nuclei_dictionary["spot_axis_major_length"].append(
2201                    spot_axis_major_length
2202                )
2203                nuclei_dictionary["spot_axis_minor_length"].append(
2204                    spot_axis_minor_length
2205                )
2206                nuclei_dictionary["spot_eccentricity"].append(spot_eccentricity)
2207                nuclei_dictionary["spot_size_equivalent_diameter_area"].append(
2208                    spot_size_equivalent_diameter_area
2209                )
2210                nuclei_dictionary["spot_feret_diameter_max"].append(
2211                    spot_feret_diameter_max
2212                )
2213                nuclei_dictionary["spot_perimeter"].append(spot_perimeter)
2214                nuclei_dictionary["spot_perimeter_crofton"].append(
2215                    spot_perimeter_crofton
2216                )
2217
2218            self.nuclei_results["chromatinization"] = chromation_dic
2219            self.nuclei_results["nuclei_chromatinization"] = nuclei_dictionary
2220
2221            self.images["nuclei_chromatinization"] = self.create_mask(
2222                chromation_dic, self.image
2223            )
2224
2225            img_chrom = adjust_img_16bit(
2226                cv2.cvtColor(
2227                    self.create_mask(
2228                        self.nuclei_results["chromatinization"], self.image
2229                    ),
2230                    cv2.COLOR_BGR2GRAY,
2231                ),
2232                color="yellow",
2233            )
2234
2235            if isinstance(self.nuclei_results["nuclei_reduced"], dict):
2236                nuclei_mask = adjust_img_16bit(
2237                    cv2.cvtColor(
2238                        self.create_mask(
2239                            self.nuclei_results["nuclei_reduced"], self.image
2240                        ),
2241                        cv2.COLOR_BGR2GRAY,
2242                    ),
2243                    color="blue",
2244                )
2245            else:
2246                nuclei_mask = adjust_img_16bit(
2247                    cv2.cvtColor(
2248                        self.create_mask(self.nuclei_results["nuclei"], self.image),
2249                        cv2.COLOR_BGR2GRAY,
2250                    ),
2251                    color="blue",
2252                )
2253
2254            nuclei_mask = merge_images([nuclei_mask, img_chrom], [1, 1])
2255
2256            try:
2257                img = cv2.cvtColor(full_im, cv2.COLOR_BGR2GRAY)
2258            except:
2259                img = full_im
2260
2261            oryginal = adjust_img_16bit(img, color="gray")
2262
2263            concatenated_image = cv2.hconcat([oryginal, nuclei_mask])
2264
2265            self.images["nuclei_chromatinization"] = concatenated_image
2266
2267            if cfg._DISPLAY_MODE:
2268                if self.show_plots:
2269                    display_preview(
2270                        self.resize_to_screen_img(
2271                            self.images["nuclei_chromatinization"]
2272                        )
2273                    )
2274
2275        else:
2276            print("Lack of nuclei data to select!")
2277
2278    # separate function for chromatinization
2279
2280    def _nuclei_chromatinization_series(self, image, nuclei_data):
2281        """
2282        Helper method for performing chromatinization analysis on nuclei detected in the provided image.
2283        """
2284
2285        def add_lists(f, g):
2286            result = []
2287            max_length = max(len(f), len(g))
2288
2289            for i in range(max_length):
2290                f_elem = f[i] if i < len(f) else ""
2291                g_elem = g[i] if i < len(g) else ""
2292                result.append(f_elem + g_elem)
2293
2294            return result
2295
2296        def reverse_coords(image, x, y):
2297
2298            zero = np.zeros(image.shape)
2299
2300            zero[x, y] = 2**16
2301
2302            zero_indices = np.where(zero == 0)
2303
2304            return zero_indices[0], zero_indices[1]
2305
2306        nuclei_dictionary = nuclei_data.copy()
2307
2308        if nuclei_dictionary is not None:
2309            arrays_list = copy.deepcopy(nuclei_dictionary["coords"])
2310
2311            chromatione_info = {
2312                "area": [],
2313                "area_bbox": [],
2314                "area_convex": [],
2315                "area_filled": [],
2316                "axis_major_length": [],
2317                "axis_minor_length": [],
2318                "eccentricity": [],
2319                "equivalent_diameter_area": [],
2320                "feret_diameter_max": [],
2321                "solidity": [],
2322                "perimeter": [],
2323                "perimeter_crofton": [],
2324                "coords": [],
2325            }
2326
2327            full_im = np.zeros(image.shape[0:2], dtype=np.uint16)
2328            full_im = adjust_img_16bit(full_im)
2329
2330            for arr in arrays_list:
2331                x = list(arr[:, 0])
2332                y = list(arr[:, 1])
2333
2334                x1, y1 = reverse_coords(image, x, y)
2335
2336                regions_chro2 = image.copy()
2337
2338                regions_chro2[x1, y1] = 0
2339
2340                regions_chro2 = regions_chro2.astype("uint16")
2341
2342                try:
2343                    regions_chro2 = cv2.cvtColor(regions_chro2, cv2.COLOR_BGR2GRAY)
2344                except:
2345                    pass
2346
2347                regions_chro2 = adjust_img_16bit(
2348                    regions_chro2,
2349                    brightness=self.img_adj_par_chrom["brightness"],
2350                    contrast=self.img_adj_par_chrom["contrast"],
2351                    gamma=self.img_adj_par_chrom["gamma"],
2352                )
2353
2354                full_im = merge_images(
2355                    image_list=[full_im, regions_chro2], intensity_factors=[1, 1]
2356                )
2357
2358                ret, _ = cv2.threshold(
2359                    regions_chro2[x, y],
2360                    0,
2361                    2**16 - 1,
2362                    cv2.THRESH_BINARY + cv2.THRESH_OTSU,
2363                )
2364
2365                regions_chro2[
2366                    regions_chro2
2367                    <= ret * self.hyperparameter_chromatinization["cut_point"]
2368                ] = 0
2369
2370                regions_chro2 = cv2.cvtColor(regions_chro2, cv2.COLOR_BGR2GRAY)
2371
2372                chromatione = regions_chro2 > 0
2373
2374                labeled_cells = measure.label(chromatione)
2375                regions = measure.regionprops(labeled_cells)
2376                regions = measure.regionprops(
2377                    labeled_cells, intensity_image=regions_chro2
2378                )
2379
2380                for region in regions:
2381
2382                    chromatione_info["area"].append(region.area)
2383                    chromatione_info["area_bbox"].append(region.area_bbox)
2384                    chromatione_info["area_convex"].append(region.area_convex)
2385                    chromatione_info["area_filled"].append(region.area_filled)
2386                    chromatione_info["axis_major_length"].append(
2387                        region.axis_major_length
2388                    )
2389                    chromatione_info["axis_minor_length"].append(
2390                        region.axis_minor_length
2391                    )
2392                    chromatione_info["eccentricity"].append(region.eccentricity)
2393                    chromatione_info["equivalent_diameter_area"].append(
2394                        region.equivalent_diameter_area
2395                    )
2396                    chromatione_info["feret_diameter_max"].append(
2397                        region.feret_diameter_max
2398                    )
2399                    chromatione_info["solidity"].append(region.solidity)
2400                    chromatione_info["perimeter"].append(region.perimeter)
2401                    chromatione_info["perimeter_crofton"].append(
2402                        region.perimeter_crofton
2403                    )
2404                    chromatione_info["coords"].append(region.coords)
2405
2406            ratios = []
2407
2408            for min_len, max_len in zip(
2409                chromatione_info["axis_minor_length"],
2410                chromatione_info["axis_major_length"],
2411            ):
2412                if max_len != 0:
2413                    ratio = min_len / max_len
2414                    ratios.append(ratio)
2415                else:
2416                    ratios.append(float(0.0))
2417
2418            chromatione_info["ratio"] = ratios
2419
2420            chromation_dic = self.drop_dict(
2421                chromatione_info,
2422                key="area",
2423                var=self.hyperparameter_chromatinization["min_size"],
2424                action=">",
2425            )
2426            chromation_dic = self.drop_dict(
2427                chromation_dic,
2428                key="area",
2429                var=self.hyperparameter_chromatinization["max_size"],
2430                action="<",
2431            )
2432            chromation_dic = self.drop_dict(
2433                chromation_dic,
2434                key="ratio",
2435                var=self.hyperparameter_chromatinization["ratio"],
2436                action=">",
2437            )
2438
2439            arrays_list2 = copy.deepcopy(chromation_dic["coords"])
2440
2441            nuclei_dictionary["spot_size_area"] = []
2442            nuclei_dictionary["spot_size_area_bbox"] = []
2443            nuclei_dictionary["spot_size_area_convex"] = []
2444            nuclei_dictionary["spot_size_area_filled"] = []
2445            nuclei_dictionary["spot_axis_major_length"] = []
2446            nuclei_dictionary["spot_axis_minor_length"] = []
2447            nuclei_dictionary["spot_eccentricity"] = []
2448            nuclei_dictionary["spot_size_equivalent_diameter_area"] = []
2449            nuclei_dictionary["spot_feret_diameter_max"] = []
2450            nuclei_dictionary["spot_perimeter"] = []
2451            nuclei_dictionary["spot_perimeter_crofton"] = []
2452
2453            for arr in arrays_list:
2454
2455                spot_size_area = []
2456                spot_size_area_bbox = []
2457                spot_size_area_convex = []
2458                spot_size_area_convex = []
2459                spot_size_area_filled = []
2460                spot_axis_major_length = []
2461                spot_axis_minor_length = []
2462                spot_eccentricity = []
2463                spot_size_equivalent_diameter_area = []
2464                spot_feret_diameter_max = []
2465                spot_perimeter = []
2466                spot_perimeter_crofton = []
2467
2468                # Flatten the array,
2469                df_tmp = pd.DataFrame(arr)
2470                df_tmp["duplicates"] = add_lists(
2471                    [str(x) for x in df_tmp[0]], [str(y) for y in df_tmp[1]]
2472                )
2473
2474                counter_tmp = Counter(df_tmp["duplicates"])
2475
2476                for j, arr2 in enumerate(arrays_list2):
2477                    df_tmp2 = pd.DataFrame(arr2)
2478                    df_tmp2["duplicates"] = add_lists(
2479                        [str(x) for x in df_tmp2[0]], [str(y) for y in df_tmp2[1]]
2480                    )
2481
2482                    counter_tmp2 = Counter(df_tmp2["duplicates"])
2483                    intersection_length = len(counter_tmp.keys() & counter_tmp2.keys())
2484                    min_length = min(len(counter_tmp), len(counter_tmp2))
2485
2486                    if intersection_length >= 0.8 * min_length:
2487
2488                        if (
2489                            len(list(df_tmp2["duplicates"]))
2490                            / len(list(df_tmp["duplicates"]))
2491                        ) >= 0.025 and (
2492                            len(list(df_tmp2["duplicates"]))
2493                            / len(list(df_tmp["duplicates"]))
2494                        ) <= 0.5:
2495                            spot_size_area.append(chromation_dic["area"][j])
2496                            spot_size_area_bbox.append(chromation_dic["area_bbox"][j])
2497                            spot_size_area_convex.append(
2498                                chromation_dic["area_convex"][j]
2499                            )
2500                            spot_size_area_filled.append(
2501                                chromation_dic["area_filled"][j]
2502                            )
2503                            spot_axis_major_length.append(
2504                                chromation_dic["axis_major_length"][j]
2505                            )
2506                            spot_axis_minor_length.append(
2507                                chromation_dic["axis_minor_length"][j]
2508                            )
2509                            spot_eccentricity.append(chromation_dic["eccentricity"][j])
2510                            spot_size_equivalent_diameter_area.append(
2511                                chromation_dic["equivalent_diameter_area"][j]
2512                            )
2513                            spot_feret_diameter_max.append(
2514                                chromation_dic["feret_diameter_max"][j]
2515                            )
2516                            spot_perimeter.append(chromation_dic["perimeter"][j])
2517                            spot_perimeter_crofton.append(
2518                                chromation_dic["perimeter_crofton"][j]
2519                            )
2520
2521                nuclei_dictionary["spot_size_area"].append(spot_size_area)
2522                nuclei_dictionary["spot_size_area_bbox"].append(spot_size_area_bbox)
2523                nuclei_dictionary["spot_size_area_convex"].append(spot_size_area_convex)
2524                nuclei_dictionary["spot_size_area_filled"].append(spot_size_area_filled)
2525                nuclei_dictionary["spot_axis_major_length"].append(
2526                    spot_axis_major_length
2527                )
2528                nuclei_dictionary["spot_axis_minor_length"].append(
2529                    spot_axis_minor_length
2530                )
2531                nuclei_dictionary["spot_eccentricity"].append(spot_eccentricity)
2532                nuclei_dictionary["spot_size_equivalent_diameter_area"].append(
2533                    spot_size_equivalent_diameter_area
2534                )
2535                nuclei_dictionary["spot_feret_diameter_max"].append(
2536                    spot_feret_diameter_max
2537                )
2538                nuclei_dictionary["spot_perimeter"].append(spot_perimeter)
2539                nuclei_dictionary["spot_perimeter_crofton"].append(
2540                    spot_perimeter_crofton
2541                )
2542
2543            self.nuclei_results["chromatinization"] = chromation_dic
2544            self.nuclei_results["nuclei_chromatinization"] = nuclei_dictionary
2545
2546            self.images["nuclei_chromatinization"] = self.create_mask(
2547                chromation_dic, image
2548            )
2549
2550            img_chrom = adjust_img_16bit(
2551                cv2.cvtColor(
2552                    self.create_mask(self.nuclei_results["chromatinization"], image),
2553                    cv2.COLOR_BGR2GRAY,
2554                ),
2555                color="yellow",
2556            )
2557
2558            nuclei_mask = adjust_img_16bit(
2559                cv2.cvtColor(self.create_mask(nuclei_data, image), cv2.COLOR_BGR2GRAY),
2560                color="blue",
2561            )
2562
2563            nuclei_mask = merge_images([nuclei_mask, img_chrom], [1, 1])
2564
2565            try:
2566                img = cv2.cvtColor(full_im, cv2.COLOR_BGR2GRAY)
2567            except:
2568                img = full_im
2569
2570            oryginal = adjust_img_16bit(img, color="gray")
2571
2572            concatenated_image = cv2.hconcat([oryginal, nuclei_mask])
2573
2574            self.images["nuclei_chromatinization"] = concatenated_image
2575
2576            if cfg._DISPLAY_MODE:
2577                if self.show_plots:
2578                    display_preview(
2579                        self.resize_to_screen_img(
2580                            self.images["nuclei_chromatinization"]
2581                        )
2582                    )
2583
2584        else:
2585            print("Lack of nuclei data to select!")
2586
2587    def browser_test(self):
2588        """
2589        Displays test results generated by the ``nuclei_finder_test()`` method
2590        in the default web browser.
2591        """
2592
2593        html_content = ""
2594
2595        for fig in self.test_results:
2596            buf = BytesIO()
2597            fig.savefig(buf, format="png", bbox_inches="tight")
2598            buf.seek(0)
2599
2600            img_base64 = base64.b64encode(buf.read()).decode("utf-8")
2601
2602            html_content += f'<img src="data:image/png;base64,{img_base64}" style="margin:10px;"/>\n'
2603
2604        with tempfile.NamedTemporaryFile(
2605            mode="w", delete=False, suffix=".html"
2606        ) as tmp_file:
2607            tmp_file.write(html_content)
2608            tmp_filename = tmp_file.name
2609
2610        webbrowser.open_new_tab(tmp_filename)
2611
2612    def series_analysis_chromatinization(
2613        self,
2614        path_to_images: str,
2615        file_extension: str = "tiff",
2616        selected_id: list = [],
2617        fille_name_part: str = "",
2618        selection_opt: bool = True,
2619        include_img: bool = True,
2620        test_series: int = 0,
2621    ):
2622        """
2623        Performs full analysis on images provided via the ``input_image()`` method
2624        using default or user-defined parameters.
2625
2626        This method runs nuclei detection, nuclei selection, and chromatinization
2627        analysis in a single pipeline. Users can adjust parameters for each step
2628        before running the analysis.
2629
2630        To show current parameters, use:
2631            - ``current_parameters_nuclei``
2632            - ``current_parameters_img_adj``
2633            - ``current_parameters_chromatinization``
2634            - ``current_parameters_img_adj_chro``
2635
2636        To set new parameters, use:
2637            - ``set_nms()``
2638            - ``set_prob()``
2639            - ``set_adj_image_gamma()``
2640            - ``set_adj_image_contrast()``
2641            - ``set_adj_image_brightness()``
2642            - ``set_nuclei_circularity()``
2643            - ``set_nuclei_size()``
2644            - ``set_nuclei_min_mean_intensity()``
2645            - ``set_chromatinization_size()``
2646            - ``set_chromatinization_ratio()``
2647            - ``set_chromatinization_cut_point()``
2648            - ``set_adj_chrom_gamma()``
2649            - ``set_adj_chrom_contrast()``
2650            - ``set_adj_chrom_brightness()``
2651
2652        Parameters
2653        ----------
2654        path_to_images : str
2655            Path to the directory containing images for analysis.
2656
2657        file_extension : str, optional
2658            Extension of the image files. Default is 'tiff'.
2659
2660        selected_id : list, optional
2661            List of IDs that must be part of the image name to distinguish them
2662            from others. Default is an empty list, which means all images in
2663            the directory will be processed.
2664
2665        fille_name_part : str, optional
2666            Part of the file name to filter images. Default is an empty string.
2667
2668        selection_opt : bool, optional
2669            Whether to run ``select_nuclei()`` with the defined parameters. Default is True.
2670
2671        include_img : bool, optional
2672            Whether to include the images in the result dictionary. Default is True.
2673
2674        test_series : int, optional
2675            Number of images to test the parameters and return results. Default is 0,
2676            which means all images in the directory will be processed.
2677
2678        Returns
2679        -------
2680        results_dict : dict
2681            Dictionary containing results for each image in the directory.
2682            Keys correspond to image file names.
2683
2684        Notes
2685        -----
2686        This method runs the complete nuclei and chromatinization analysis pipeline.
2687
2688        Parameters must be set appropriately before calling to ensure correct results.
2689        """
2690
2691        results_dict = {}
2692        results_img = {}
2693        results_img_raw = {}
2694
2695        files = glob.glob(os.path.join(path_to_images, "*." + file_extension))
2696
2697        if len(fille_name_part) > 0:
2698            files = [x for x in files if fille_name_part.lower() in x.lower()]
2699
2700        if len(selected_id) > 0:
2701            selected_id = [str(x) for x in selected_id]
2702            files = [
2703                x
2704                for x in files
2705                if re.sub("_.*", "", os.path.basename(x)) in selected_id
2706            ]
2707
2708        if test_series > 0:
2709
2710            files = random.sample(files, test_series)
2711
2712        self.show_plots = False
2713        self.series_im = True
2714
2715        print("\nFile analysis:\n\n")
2716
2717        for file in tqdm(files):
2718
2719            print(file)
2720
2721            self.show_plots = False
2722
2723            image = self.load_image(file)
2724
2725            self.input_image(image)
2726
2727            self.find_nuclei()
2728
2729            tmp = None
2730
2731            if selection_opt is True:
2732                self.select_nuclei()
2733                tmp = self.get_results_nuclei_selected()
2734
2735            else:
2736                tmp = self.get_results_nuclei()
2737
2738            if tmp is not None:
2739
2740                if tmp[0] is not None:
2741
2742                    results_dict[str(os.path.basename(file))] = tmp[0]
2743                    results_img[str(os.path.basename(file))] = tmp[1]
2744                    results_img_raw[str(os.path.basename(file))] = image
2745                    del tmp
2746                    del image
2747
2748        results_dict_tmp = self.repairing_nuclei(results_dict)
2749
2750        results_dict = {}
2751
2752        print("\nChromatization searching:\n\n")
2753
2754        for ke in tqdm(results_dict_tmp.keys()):
2755
2756            tmp = None
2757
2758            try:
2759                self._nuclei_chromatinization_series(
2760                    results_img_raw[ke], results_dict_tmp[ke]
2761                )
2762                tmp = self.get_results_nuclei_chromatinization()
2763            except:
2764                print(f"Sample {ke} could not be processed.")
2765
2766            if tmp is not None:
2767
2768                if tmp[0] is not None:
2769
2770                    tmp[0].pop("coords")
2771
2772                    if include_img:
2773                        results_dict[str(os.path.basename(ke))] = {
2774                            "stats": tmp[0],
2775                            "img": cv2.hconcat([results_img[ke], tmp[1]]),
2776                        }
2777                        del tmp
2778                    else:
2779                        results_dict[str(os.path.basename(ke))] = tmp[0]
2780                        del tmp
2781
2782            else:
2783                print(f"Unable to obtain results for {print(ke)}")
2784
2785        self.show_plots = True
2786        self.series_im = False
2787
2788        return results_dict
2789
2790    def series_analysis_nuclei(
2791        self,
2792        path_to_images: str,
2793        file_extension: str = "tiff",
2794        selected_id: list = [],
2795        fille_name_part: str = "",
2796        selection_opt: bool = True,
2797        include_img: bool = True,
2798        test_series: int = 0,
2799    ):
2800        """
2801        Performs analysis on the image provided by the ``input_image()`` method
2802        using default or user-defined parameters.
2803
2804        This method runs nuclei detection and selection using the currently set
2805        parameters. Users can adjust image preprocessing and nuclei detection
2806        parameters before running the analysis.
2807
2808        To show current parameters, use:
2809            - ``current_parameters_nuclei``
2810            - ``current_parameters_img_adj``
2811
2812        To set new parameters, use:
2813            - ``set_nms()``
2814            - ``set_prob()``
2815            - ``set_adj_image_gamma()``
2816            - ``set_adj_image_contrast()``
2817            - ``set_adj_image_brightness()``
2818            - ``set_nuclei_circularity()``
2819            - ``set_nuclei_size()``
2820            - ``set_nuclei_min_mean_intensity()``
2821
2822        Parameters
2823        ----------
2824        path_to_images : str
2825            Path to the directory containing images for analysis.
2826
2827        file_extension : str, optional
2828            Extension of the image files. Default is 'tiff'.
2829
2830        selected_id : list, optional
2831            List of IDs that must be part of the image name to distinguish them
2832            from others. Default is an empty list, which means all images in
2833            the directory will be processed.
2834
2835        fille_name_part : str, optional
2836            Part of the file name to filter images. Default is an empty string.
2837
2838        selection_opt : bool, optional
2839            Whether to run the ``select_nuclei()`` method with the defined parameters.
2840            Default is True.
2841
2842        include_img : bool, optional
2843            Whether to include the images in the result dictionary. Default is True.
2844
2845        test_series : int, optional
2846            Number of images to test the parameters and return results. Default is 0,
2847            which means all images in the directory will be processed.
2848
2849        Returns
2850        -------
2851        results_dict : dict
2852            Dictionary containing results for each image in the directory.
2853            Keys correspond to image file names.
2854        """
2855
2856        results_dict = {}
2857        results_img = {}
2858
2859        files = glob.glob(os.path.join(path_to_images, "*." + file_extension))
2860
2861        if len(fille_name_part) > 0:
2862            files = [x for x in files if fille_name_part.lower() in x.lower()]
2863
2864        if len(selected_id) > 0:
2865            selected_id = [str(x) for x in selected_id]
2866            files = [
2867                x
2868                for x in files
2869                if re.sub("_.*", "", os.path.basename(x)) in selected_id
2870            ]
2871
2872        if test_series > 0:
2873
2874            files = random.sample(files, test_series)
2875
2876        self.show_plots = False
2877        self.series_im = True
2878
2879        print("\nFile analysis:\n\n")
2880
2881        for file in tqdm(files):
2882
2883            print(file)
2884
2885            image = self.load_image(file)
2886
2887            self.input_image(image)
2888
2889            self.find_nuclei()
2890
2891            if self.nuclei_results["nuclei"] is not None:
2892
2893                tmp = [None]
2894
2895                if selection_opt is True:
2896                    self.select_nuclei()
2897                    tmp = self.get_results_nuclei_selected()
2898
2899                else:
2900                    tmp = self.get_results_nuclei()
2901
2902                if tmp is not None:
2903
2904                    if tmp[0] is not None:
2905
2906                        if include_img:
2907                            results_dict[str(os.path.basename(file))] = tmp[0]
2908                            results_img[str(os.path.basename(file))] = tmp[1]
2909
2910                            del tmp
2911
2912                        else:
2913                            results_dict[str(os.path.basename(file))] = tmp[0]
2914                            del tmp
2915
2916                else:
2917                    print(f"Unable to obtain results for {print(file)}")
2918
2919            else:
2920
2921                print(f"Unable to obtain results for {print(file)}")
2922
2923        self.show_plots = True
2924        self.series_im = False
2925
2926        results_dict_tmp = self.repairing_nuclei(results_dict)
2927
2928        if include_img is False:
2929
2930            return results_dict_tmp
2931
2932        else:
2933
2934            results_dict = {}
2935
2936            for ke in results_dict_tmp.keys():
2937
2938                nuclei_mask = adjust_img_16bit(
2939                    cv2.cvtColor(
2940                        self.create_mask(results_dict_tmp[ke], results_img[ke]),
2941                        cv2.COLOR_BGR2GRAY,
2942                    ),
2943                    color="blue",
2944                )
2945                concatenated_image = cv2.hconcat([results_img[ke], nuclei_mask])
2946
2947                cred = results_dict_tmp[ke]
2948                # cred.pop('coords')
2949
2950                results_dict[ke] = {"stats": cred, "img": concatenated_image}
2951
2952            return results_dict

Implements a comprehensive pipeline for automated segmentation, selection, and analysis of cell nuclei and their internal chromatin structure in microscopy images.

It utilizes a pre-trained deep learning model (StarDist2D) for initial nuclear identification, followed by the application of advanced morphological and intensity filters, and a dedicated algorithm for quantifying chromatinization. The class provides detailed control over the hyperparameters for both the segmentation process and image preprocessing stages.

Parameters

image : np.ndarray, optional The input image (typically 16-bit) for analysis.

test_results : list, optional Plots resulting from parameter testing (e.g., NMS/Prob combinations).

hyperparameter_nuclei : dict, optional Parameters for nuclei segmentation and filtering (e.g., 'nms', 'prob', 'min_size', 'circularity').

hyperparameter_chromatinization : dict, optional Parameters for segmenting and filtering chromatin spots (e.g., 'cut_point', 'ratio').

img_adj_par_chrom : dict, optional Image adjustment parameters (gamma, contrast) specifically for chromatin analysis.

img_adj_par : dict, optional Image adjustment parameters for nuclei segmentation.

show_plots : bool, optional Flag controlling the automatic display of visual results.

nuclei_results : dict, optional A dictionary storing numerical data (features) extracted from the nuclei.

images : dict, optional A dictionary storing output images and masks.

Attributes

image : np.ndarray The currently loaded image for analysis.

test_results : list The visual outcomes of NMS/Prob parameter tests.

hyperparameter_nuclei : dict A dictionary of active parameters used by the find_nuclei() and select_nuclei() methods.

hyperparameter_chromatinization : dict A dictionary of active parameters used by the nuclei_chromatinization() method.

img_adj_par : dict Image correction parameters for nuclei segmentation.

img_adj_par_chrom : dict Image correction parameters for chromatin analysis.

show_plots : bool The state of the plot display flag.

nuclei_results : dict Stores feature dictionaries for: all detected ('nuclei'), selected ('nuclei_reduced'), and chromatinization data ('nuclei_chromatinization').

images : dict Stores masks and images visualizing the results.

series_im : bool Flag indicating if the class is operating in a batch or series processing mode.

Methods

set_nms(nms) Sets the Non-Maximum Suppression (NMS) threshold.

set_prob(prob) Sets the segmentation probability threshold.

set_nuclei_circularity(circ) Sets the minimum required circularity for a nucleus.

set_nuclei_local_intenisty_FC(local_FC) Sets the factor used for removing false positives based on local intensity differences.

set_nuclei_global_area_FC(global_FC) Sets the factor used for removing size-based outlier false positives.

set_nuclei_size(size) Sets the minimum and maximum area (in pixels) for nuclei selection.

set_nuclei_min_mean_intensity(intensity) Sets the minimum required mean intensity value for a nucleus.

set_chromatinization_size(size) Sets the minimum and maximum area (in pixels) for chromatin spot selection.

set_chromatinization_cut_point(cut_point) Sets the factor used to adjust the chromatin segmentation threshold (Otsu's method).

set_adj_image_gamma(gamma) Sets the gamma correction for the nuclei image.

set_adj_chrom_contrast(contrast) Sets the contrast adjustment for the chromatinization image.

current_parameters_nuclei (property) Returns the active nuclei segmentation and filtering parameters.

find_nuclei() Performs nuclei segmentation using StarDist and extracts initial features.

select_nuclei() Filters the detected nuclei based on set morphological and intensity criteria.

nuclei_chromatinization() Performs quantitative and morphological analysis of chromatin spots in selected nuclei.

get_features(model_out, image) Calculates geometric and intensity features from a segmented mask (label image).

Notes

The typical analysis workflow follows this order:

  1. input_image()
  2. find_nuclei()
  3. select_nuclei() (Optional)
  4. nuclei_chromatinization() (Optional)
NucleiFinder( image=None, test_results=None, hyperparameter_nuclei=None, hyperparameter_chromatinization=None, img_adj_par_chrom=None, img_adj_par=None, show_plots=None, nuclei_results=None, images=None)
1095    def __init__(
1096        self,
1097        image=None,
1098        test_results=None,
1099        hyperparameter_nuclei=None,
1100        hyperparameter_chromatinization=None,
1101        img_adj_par_chrom=None,
1102        img_adj_par=None,
1103        show_plots=None,
1104        nuclei_results=None,
1105        images=None,
1106    ):
1107        """
1108        The main class for the detection and analysis of cell nuclei and their chromatinization
1109        in microscopy or flow cytometry images, utilizing the StarDist segmentation model.
1110
1111        This class inherits functionality for image processing (ImageTools) and
1112        results handling (RepTools).
1113
1114        Parameters
1115        ----------
1116        image : np.ndarray, optional
1117            The input image for analysis.
1118            Default: None.
1119
1120        test_results : list, optional
1121            A list of plots or images resulting from parameter testing.
1122            Default: None.
1123
1124        hyperparameter_nuclei : dict, optional
1125            The segmentation parameters for nuclei detection.
1126            Default:
1127            {'nms': 0.8, 'prob': 0.4, 'max_size': 1000, 'min_size': 20,
1128             'circularity': 0.6, 'intensity_mean': 6553.5, 'nn_min': 10,
1129             'FC_diff_global': 1.5, 'FC_diff_local_intensity': 0.6}
1130
1131        hyperparameter_chromatinization : dict, optional
1132            The analysis parameters for chromatin spots within the nuclei.
1133            Default:
1134            {'max_size': 800, 'min_size': 2, 'ratio': 0.1, 'cut_point': 0.95}
1135
1136        img_adj_par_chrom : dict, optional
1137            Image adjustment parameters (gamma, contrast, brightness) for chromatin analysis.
1138            Default: {'gamma': 0.25, 'contrast': 5, 'brightness': 950}
1139
1140        img_adj_par : dict, optional
1141            Image adjustment parameters (gamma, contrast, brightness) for nuclei segmentation.
1142            Default: {'gamma': 0.9, 'contrast': 2, 'brightness': 1000}
1143
1144        show_plots : bool, optional
1145            Flag to determine whether results and plots should be displayed automatically.
1146            Default: True.
1147
1148        nuclei_results : dict, optional
1149            A dictionary storing the numerical results of the analysis.
1150            Default: {'nuclei': None, 'nuclei_reduced': None, 'nuclei_chromatinization': None}
1151
1152        images : dict, optional
1153            A dictionary storing the output images (e.g., masks).
1154            Default: {'nuclei': None, 'nuclei_reduced': None, 'nuclei_chromatinization': None}
1155
1156        Attributes
1157        ----------
1158        image : np.ndarray
1159            The currently loaded image for analysis.
1160
1161        hyperparameter_nuclei : dict
1162            Active nuclei segmentation parameters.
1163
1164        hyperparameter_chromatinization : dict
1165            Active chromatinization analysis parameters.
1166
1167        img_adj_par : dict
1168            Active image correction parameters for nuclei segmentation.
1169
1170        img_adj_par_chrom : dict
1171            Active image correction parameters for chromatin analysis.
1172
1173        show_plots : bool
1174            The current state of the plot display flag.
1175
1176        series_im : bool
1177            Flag indicating if a series of images is being processed.
1178
1179        Notes
1180        -----
1181        The default value for 'intensity_mean' in hyperparameter_nuclei is calculated
1182        as $(2^{16} - 1) / 10$, which represents 10% of the maximum 16-bit value (65535 / 10 = 6553.5).
1183
1184        The image adjustment parameters are crucial for optimizing contrast and brightness
1185        to improve the performance of both the StarDist model and the subsequent
1186        chromatin thresholding.
1187        """
1188
1189        # Use default values if parameters are None
1190        self.image = image or None
1191        """Loaded input image."""
1192        self.test_results = test_results or None
1193        """Results of parameter tests.
1194
1195            This attribute or method stores the outcomes of parameter testing procedures.
1196            For interactive browsing and inspection of the results, use the 
1197            `browser_test(self)` method."""
1198
1199        self.hyperparameter_nuclei = hyperparameter_nuclei or {
1200            "nms": 0.8,
1201            "prob": 0.4,
1202            "max_size": 1000,
1203            "min_size": 20,
1204            "circularity": 0.6,
1205            "intensity_mean": (2**16 - 1) / 10,
1206            "nn_min": 10,
1207            "FC_diff_global": 1.5,
1208            "FC_diff_local_intensity": 0.6,
1209        }
1210        """Active nuclei segmentation/filter parameters."""
1211
1212        self.hyperparameter_chromatinization = hyperparameter_chromatinization or {
1213            "max_size": 800,
1214            "min_size": 2,
1215            "ratio": 0.1,
1216            "cut_point": 0.95,
1217        }
1218        """Active chromatin analysis parameters."""
1219
1220        self.img_adj_par_chrom = img_adj_par_chrom or {
1221            "gamma": 0.25,
1222            "contrast": 5,
1223            "brightness": 950,
1224        }
1225        """Image adjustment for chromatin analysis."""
1226
1227        self.img_adj_par = img_adj_par or {
1228            "gamma": 0.9,
1229            "contrast": 2,
1230            "brightness": 1000,
1231        }
1232        """Image adjustment for nuclei segmentation."""
1233
1234        self.show_plots = show_plots or True
1235        """Flag controlling plot display."""
1236
1237        self.nuclei_results = nuclei_results or {
1238            "nuclei": None,
1239            "nuclei_reduced": None,
1240            "nuclei_chromatinization": None,
1241        }
1242        """Stored dictionary of nuclei analysis results."""
1243
1244        self.images = images or {
1245            "nuclei": None,
1246            "nuclei_reduced": None,
1247            "nuclei_chromatinization": None,
1248        }
1249        """Stored dictionary of images from nuclei analysis."""
1250
1251        # sereies images
1252        self.series_im = False
1253        """Flag for batch/series image processing."""

The main class for the detection and analysis of cell nuclei and their chromatinization in microscopy or flow cytometry images, utilizing the StarDist segmentation model.

This class inherits functionality for image processing (ImageTools) and results handling (RepTools).

Parameters

image : np.ndarray, optional The input image for analysis. Default: None.

test_results : list, optional A list of plots or images resulting from parameter testing. Default: None.

hyperparameter_nuclei : dict, optional The segmentation parameters for nuclei detection. Default: {'nms': 0.8, 'prob': 0.4, 'max_size': 1000, 'min_size': 20, 'circularity': 0.6, 'intensity_mean': 6553.5, 'nn_min': 10, 'FC_diff_global': 1.5, 'FC_diff_local_intensity': 0.6}

hyperparameter_chromatinization : dict, optional The analysis parameters for chromatin spots within the nuclei. Default: {'max_size': 800, 'min_size': 2, 'ratio': 0.1, 'cut_point': 0.95}

img_adj_par_chrom : dict, optional Image adjustment parameters (gamma, contrast, brightness) for chromatin analysis. Default: {'gamma': 0.25, 'contrast': 5, 'brightness': 950}

img_adj_par : dict, optional Image adjustment parameters (gamma, contrast, brightness) for nuclei segmentation. Default: {'gamma': 0.9, 'contrast': 2, 'brightness': 1000}

show_plots : bool, optional Flag to determine whether results and plots should be displayed automatically. Default: True.

nuclei_results : dict, optional A dictionary storing the numerical results of the analysis. Default: {'nuclei': None, 'nuclei_reduced': None, 'nuclei_chromatinization': None}

images : dict, optional A dictionary storing the output images (e.g., masks). Default: {'nuclei': None, 'nuclei_reduced': None, 'nuclei_chromatinization': None}

Attributes

image : np.ndarray The currently loaded image for analysis.

hyperparameter_nuclei : dict Active nuclei segmentation parameters.

hyperparameter_chromatinization : dict Active chromatinization analysis parameters.

img_adj_par : dict Active image correction parameters for nuclei segmentation.

img_adj_par_chrom : dict Active image correction parameters for chromatin analysis.

show_plots : bool The current state of the plot display flag.

series_im : bool Flag indicating if a series of images is being processed.

Notes

The default value for 'intensity_mean' in hyperparameter_nuclei is calculated as $(2^{16} - 1) / 10$, which represents 10% of the maximum 16-bit value (65535 / 10 = 6553.5).

The image adjustment parameters are crucial for optimizing contrast and brightness to improve the performance of both the StarDist model and the subsequent chromatin thresholding.

image

Loaded input image.

test_results

Results of parameter tests.

This attribute or method stores the outcomes of parameter testing procedures. For interactive browsing and inspection of the results, use the browser_test(self) method.

hyperparameter_nuclei

Active nuclei segmentation/filter parameters.

hyperparameter_chromatinization

Active chromatin analysis parameters.

img_adj_par_chrom

Image adjustment for chromatin analysis.

img_adj_par

Image adjustment for nuclei segmentation.

show_plots

Flag controlling plot display.

nuclei_results

Stored dictionary of nuclei analysis results.

images

Stored dictionary of images from nuclei analysis.

series_im

Flag for batch/series image processing.

def set_nms(self, nms: float):
1255    def set_nms(self, nms: float):
1256        """
1257        Set the Non-Maximum Suppression (NMS) threshold.
1258
1259        The NMS threshold controls how aggressively overlapping detections are suppressed.
1260        A lower value reduces the probability of overlapping nuclei being kept.
1261
1262        Parameters
1263        ----------
1264        nms : float
1265            The NMS IoU threshold value.
1266        """
1267
1268        self.hyperparameter_nuclei["nms"] = nms

Set the Non-Maximum Suppression (NMS) threshold.

The NMS threshold controls how aggressively overlapping detections are suppressed. A lower value reduces the probability of overlapping nuclei being kept.

Parameters

nms : float The NMS IoU threshold value.

def set_prob(self, prob: float):
1270    def set_prob(self, prob: float):
1271        """
1272        Set the probability threshold used in segmentation.
1273
1274        The probability threshold determines the minimum confidence required for an object
1275        (e.g., a nucleus) to be classified as a segmented entity. Higher values result in
1276        fewer segmented objects, as only detections with strong confidence scores are kept.
1277        This may lead to omission of weaker or less distinct structures.
1278
1279        Because optimal values depend on image characteristics, it is important to visually
1280        inspect segmentation results produced with different thresholds to determine the
1281        most suitable setting.
1282
1283        Parameters
1284        ----------
1285        prob : float
1286            The probability threshold value.
1287        """
1288
1289        self.hyperparameter_nuclei["prob"] = prob

Set the probability threshold used in segmentation.

The probability threshold determines the minimum confidence required for an object (e.g., a nucleus) to be classified as a segmented entity. Higher values result in fewer segmented objects, as only detections with strong confidence scores are kept. This may lead to omission of weaker or less distinct structures.

Because optimal values depend on image characteristics, it is important to visually inspect segmentation results produced with different thresholds to determine the most suitable setting.

Parameters

prob : float The probability threshold value.

def set_nuclei_circularity(self, circ: float):
1291    def set_nuclei_circularity(self, circ: float):
1292        """
1293        This method sets 'circ' parameter. The circ is a parameter used for adjust minimal nucleus circularity.
1294
1295        Parameters
1296        ----------
1297        circ : float
1298            Nuclei circularity value.
1299        """
1300
1301        self.hyperparameter_nuclei["circularity"] = circ

This method sets 'circ' parameter. The circ is a parameter used for adjust minimal nucleus circularity.

Parameters

circ : float Nuclei circularity value.

def set_nuclei_local_intenisty_FC(self, local_FC: float):
1303    def set_nuclei_local_intenisty_FC(self, local_FC: float):
1304        """
1305        This method sets the 'FC_diff_local_intensity' parameter. The 'local_FC' is used to remove false positive multiple nuclei that were detected in single image.
1306
1307        Parameters
1308        ----------
1309        local_FC : float
1310            local_FC value.
1311        """
1312
1313        self.hyperparameter_nuclei["FC_diff_local_intensity"] = local_FC

This method sets the 'FC_diff_local_intensity' parameter. The 'local_FC' is used to remove false positive multiple nuclei that were detected in single image.

Parameters

local_FC : float local_FC value.

def set_nuclei_global_area_FC(self, global_FC: float):
1316    def set_nuclei_global_area_FC(self, global_FC: float):
1317        """
1318        This method sets the 'FC_diff_global' parameter. The 'global_FC' is used to remove false positive multiple nuclei that were detected in a single image and are outliers from the global mean area size.
1319
1320        Parameters
1321        ----------
1322        FC_diff_global : float
1323            global_FC value.
1324        """
1325
1326        self.hyperparameter_nuclei["FC_diff_global"] = global_FC

This method sets the 'FC_diff_global' parameter. The 'global_FC' is used to remove false positive multiple nuclei that were detected in a single image and are outliers from the global mean area size.

Parameters

FC_diff_global : float global_FC value.

def set_nuclei_size(self, size: tuple):
1328    def set_nuclei_size(self, size: tuple):
1329        """
1330        This method sets 'size' parameter. The size is a parameter used for adjust minimal and maximal nucleus area (px).
1331
1332        Parameters
1333        ----------
1334        size : tuple
1335            (min_value, max_value)
1336        """
1337
1338        self.hyperparameter_nuclei["min_size"] = size[0]
1339        self.hyperparameter_nuclei["max_size"] = size[1]

This method sets 'size' parameter. The size is a parameter used for adjust minimal and maximal nucleus area (px).

Parameters

size : tuple (min_value, max_value)

def set_nuclei_min_mean_intensity(self, intensity: int):
1341    def set_nuclei_min_mean_intensity(self, intensity: int):
1342        """
1343        This method sets 'intensity' parameter. The 'intensity' parameter is used to adjust the minimum mean intensity of all pixel intensities within the nucleus.
1344
1345        Parameters
1346        ----------
1347        intensity : int
1348            intensity value.
1349        """
1350
1351        self.hyperparameter_nuclei["intensity_mean"] = intensity

This method sets 'intensity' parameter. The 'intensity' parameter is used to adjust the minimum mean intensity of all pixel intensities within the nucleus.

Parameters

intensity : int intensity value.

def set_chromatinization_size(self, size: tuple):
1353    def set_chromatinization_size(self, size: tuple):
1354        """
1355        This method sets 'size' parameter. The size is a parameter used for adjust minimal and maximal chromanitization spot area (px) within the nucleus.
1356
1357        Parameters
1358        ----------
1359        size : tuple
1360            (min_value, max_value)
1361        """
1362
1363        self.hyperparameter_chromatinization["min_size"] = size[0]
1364        self.hyperparameter_chromatinization["max_size"] = size[1]

This method sets 'size' parameter. The size is a parameter used for adjust minimal and maximal chromanitization spot area (px) within the nucleus.

Parameters

size : tuple (min_value, max_value)

def set_chromatinization_ratio(self, ratio: int):
1366    def set_chromatinization_ratio(self, ratio: int):
1367        """
1368        This method sets the 'ratio' parameter. In this case, the 'ratio' parameter is similar to 'circularity' as it describes the ratio between the maximum lengths in the x and y dimensions of the nucleus chromatinization.
1369
1370        Parameters
1371        ----------
1372        ratio : float
1373            ratio value.
1374        """
1375
1376        self.hyperparameter_chromatinization["ratio"] = ratio

This method sets the 'ratio' parameter. In this case, the 'ratio' parameter is similar to 'circularity' as it describes the ratio between the maximum lengths in the x and y dimensions of the nucleus chromatinization.

Parameters

ratio : float ratio value.

def set_chromatinization_cut_point(self, cut_point: int):
1378    def set_chromatinization_cut_point(self, cut_point: int):
1379        """
1380        This method sets the 'cut_point' parameter. The 'cut_point' parameter is a factor used to adjust the threshold for separating the background from chromatin spots.
1381
1382        Parameters
1383        ----------
1384        cut_point : int
1385            cut_point value.
1386        """
1387
1388        self.hyperparameter_chromatinization["cut_point"] = cut_point

This method sets the 'cut_point' parameter. The 'cut_point' parameter is a factor used to adjust the threshold for separating the background from chromatin spots.

Parameters

cut_point : int cut_point value.

def set_adj_image_gamma(self, gamma: float):
1392    def set_adj_image_gamma(self, gamma: float):
1393        """
1394        This method sets 'gamma' parameter. The gamma is a parameter used for adjust gamma of the nucleus image.
1395
1396        Parameters
1397        ----------
1398        gamma : float
1399            gamma value.
1400        """
1401
1402        self.img_adj_par["gamma"] = gamma

This method sets 'gamma' parameter. The gamma is a parameter used for adjust gamma of the nucleus image.

Parameters

gamma : float gamma value.

def set_adj_image_contrast(self, contrast: float):
1404    def set_adj_image_contrast(self, contrast: float):
1405        """
1406        This method sets 'contrast' parameter. The contrast is a parameter used for adjust contrast of the nucleus image.
1407
1408        Parameters
1409        ----------
1410        contrast : float
1411            contrast value.
1412        """
1413
1414        self.img_adj_par["contrast"] = contrast

This method sets 'contrast' parameter. The contrast is a parameter used for adjust contrast of the nucleus image.

Parameters

contrast : float contrast value.

def set_adj_image_brightness(self, brightness: float):
1416    def set_adj_image_brightness(self, brightness: float):
1417        """
1418        This method sets 'brightness' parameter. The brightness is a parameter used for adjust brightness of the nucleus image.
1419
1420        Parameters
1421        ----------
1422        brightness : float
1423            brightness value.
1424        """
1425
1426        self.img_adj_par["brightness"] = brightness

This method sets 'brightness' parameter. The brightness is a parameter used for adjust brightness of the nucleus image.

Parameters

brightness : float brightness value.

def set_adj_chrom_gamma(self, gamma: float):
1430    def set_adj_chrom_gamma(self, gamma: float):
1431        """
1432        This method sets 'gamma' parameter. The gamma is a parameter used for adjust gamma of the nucleus chromatinization image.
1433
1434        Parameters
1435        ----------
1436        gamma : float
1437            gamma value.
1438        """
1439
1440        self.img_adj_par_chrom["gamma"] = gamma

This method sets 'gamma' parameter. The gamma is a parameter used for adjust gamma of the nucleus chromatinization image.

Parameters

gamma : float gamma value.

def set_adj_chrom_contrast(self, contrast: float):
1442    def set_adj_chrom_contrast(self, contrast: float):
1443        """
1444        This method sets 'contrast' parameter. The contrast is a parameter used for adjust contrast of the nucleus chromatinization image.
1445
1446        Parameters
1447        ----------
1448        contrast : float
1449            contrast value.
1450        """
1451
1452        self.img_adj_par_chrom["contrast"] = contrast

This method sets 'contrast' parameter. The contrast is a parameter used for adjust contrast of the nucleus chromatinization image.

Parameters

contrast : float contrast value.

def set_adj_chrom_brightness(self, brightness: float):
1454    def set_adj_chrom_brightness(self, brightness: float):
1455        """
1456        This method sets 'brightness' parameter. The brightness is a parameter used for adjust brightness of the nucleus chromatinization image.
1457
1458        Parameters
1459        ----------
1460        brightness : float
1461            brightness value.
1462        """
1463
1464        self.img_adj_par_chrom["brightness"] = brightness

This method sets 'brightness' parameter. The brightness is a parameter used for adjust brightness of the nucleus chromatinization image.

Parameters

brightness : float brightness value.

current_parameters_nuclei
1466    @property
1467    def current_parameters_nuclei(self):
1468        """
1469        This method returns current nuclei analysis parameters.
1470
1471        Returns
1472        -------
1473        dict
1474            Nuclei analysis parameters.
1475        """
1476        print(self.hyperparameter_nuclei)
1477        return self.hyperparameter_nuclei

This method returns current nuclei analysis parameters.

Returns

dict Nuclei analysis parameters.

current_parameters_chromatinization
1479    @property
1480    def current_parameters_chromatinization(self):
1481        """
1482        This method returns current nuclei chromatinization analysis parameters.
1483
1484        Returns
1485        -------
1486        dict
1487            Nuclei chromatinization analysis parameters.
1488        """
1489
1490        print(self.hyperparameter_chromatinization)
1491        return self.hyperparameter_chromatinization

This method returns current nuclei chromatinization analysis parameters.

Returns

dict Nuclei chromatinization analysis parameters.

current_parameters_img_adj
1493    @property
1494    def current_parameters_img_adj(self):
1495        """
1496        This method returns current nuclei image setup.
1497
1498        Returns
1499        -------
1500        dict
1501            Nuclei image setup.
1502        """
1503
1504        print(self.img_adj_par)
1505        return self.img_adj_par

This method returns current nuclei image setup.

Returns

dict Nuclei image setup.

current_parameters_img_adj_chro
1507    @property
1508    def current_parameters_img_adj_chro(self):
1509        """
1510        This method returns current nuclei chromatinization image setup.
1511
1512        Returns
1513        -------
1514        dict
1515            Nuclei chromatinization image setup.
1516        """
1517
1518        print(self.img_adj_par_chrom)
1519        return self.img_adj_par_chrom

This method returns current nuclei chromatinization image setup.

Returns

dict Nuclei chromatinization image setup.

def get_results_nuclei(self):
1521    def get_results_nuclei(self):
1522        """
1523        This function returns nuclei analysis results.
1524
1525        Returns
1526        -------
1527        dict
1528            Nuclei results in the dictionary format.
1529        """
1530
1531        if self.images["nuclei"] is None:
1532            print("No results to return!")
1533            return None
1534        else:
1535            if cfg._DISPLAY_MODE:
1536                if self.show_plots:
1537                    display_preview(self.resize_to_screen_img(self.images["nuclei"]))
1538            return self.nuclei_results["nuclei"], self.images["nuclei"]

This function returns nuclei analysis results.

Returns

dict Nuclei results in the dictionary format.

def get_results_nuclei_selected(self):
1540    def get_results_nuclei_selected(self):
1541        """
1542        This function returns the results of the nuclei analysis following adjustments to the data selection thresholds.
1543
1544        Returns
1545        -------
1546        dict
1547            Nuclei results in the dictionary format.
1548        """
1549
1550        if self.images["nuclei_reduced"] is None:
1551            print("No results to return!")
1552            return None
1553        else:
1554            if cfg._DISPLAY_MODE:
1555                if self.show_plots:
1556                    display_preview(
1557                        self.resize_to_screen_img(self.images["nuclei_reduced"])
1558                    )
1559            return self.nuclei_results["nuclei_reduced"], self.images["nuclei_reduced"]

This function returns the results of the nuclei analysis following adjustments to the data selection thresholds.

Returns

dict Nuclei results in the dictionary format.

def get_results_nuclei_chromatinization(self):
1561    def get_results_nuclei_chromatinization(self):
1562        """
1563        This function returns the results of the nuclei chromatinization analysis.
1564
1565        Returns
1566        -------
1567        dict
1568            Nuclei chromatinization results in the dictionary format.
1569        """
1570
1571        if self.images["nuclei_chromatinization"] is None:
1572            print("No results to return!")
1573            return None
1574        else:
1575            if cfg._DISPLAY_MODE:
1576                if self.show_plots:
1577                    display_preview(self.images["nuclei_chromatinization"])
1578            return (
1579                self.nuclei_results["nuclei_chromatinization"],
1580                self.images["nuclei_chromatinization"],
1581            )

This function returns the results of the nuclei chromatinization analysis.

Returns

dict Nuclei chromatinization results in the dictionary format.

def add_test(self, plots):
1583    def add_test(self, plots):
1584        self.test_results = plots
1585
1586        """
1587        Helper method.
1588        """
def input_image(self, img):
1590    def input_image(self, img):
1591        """
1592        This method adds the image to the class for nuclei and/or chromatinization analysis.
1593
1594        Parameters
1595        ----------
1596        img : np.ndarray
1597            Input image.
1598        """
1599
1600        self.image = img
1601        self.add_test(None)

This method adds the image to the class for nuclei and/or chromatinization analysis.

Parameters

img : np.ndarray Input image.

def get_features(self, model_out, image):
1603    def get_features(self, model_out, image):
1604        """
1605        Extracts numerical feature descriptors from model output for a given image.
1606
1607        This method processes the output returned by a feature-extraction model
1608        (e.g., CNN, encoder network, statistical model) and converts it into a
1609        structured feature vector associated with the provided image.
1610        Typically used for downstream analysis, classification, or clustering.
1611
1612        Parameters
1613        ----------
1614        model_out : any
1615            Output returned by the feature-extraction model.
1616            The expected format depends on the model (e.g., tensor, dict, list of arrays).
1617
1618        image : ndarray
1619            The input image (2D or 3D array) for which features are being extracted.
1620            Provided for reference or for combining raw image metrics with model features.
1621
1622        Returns
1623        -------
1624        features : dict
1625            Dictionary containing extracted features.
1626            Keys correspond to feature names, and values are numerical descriptors.
1627        """
1628
1629        features = {
1630            "label": [],
1631            "area": [],
1632            "area_bbox": [],
1633            "area_convex": [],
1634            "area_filled": [],
1635            "axis_major_length": [],
1636            "axis_minor_length": [],
1637            "eccentricity": [],
1638            "equivalent_diameter_area": [],
1639            "feret_diameter_max": [],
1640            "solidity": [],
1641            "perimeter": [],
1642            "perimeter_crofton": [],
1643            "circularity": [],
1644            "intensity_max": [],
1645            "intensity_mean": [],
1646            "intensity_min": [],
1647            "ratio": [],
1648            "coords": [],
1649        }
1650
1651        for region in skimage.measure.regionprops(model_out, intensity_image=image):
1652
1653            # Compute circularity
1654            if region.perimeter > 0:
1655                circularity = 4 * np.pi * region.area / (region.perimeter**2)
1656            else:
1657                circularity = 0
1658
1659            features["area"].append(region.area)
1660            features["area_bbox"].append(region.area_bbox)
1661            features["area_convex"].append(region.area_convex)
1662            features["area_filled"].append(region.area_filled)
1663            features["axis_major_length"].append(region.axis_major_length)
1664            features["axis_minor_length"].append(region.axis_minor_length)
1665            features["eccentricity"].append(region.eccentricity)
1666            features["equivalent_diameter_area"].append(region.equivalent_diameter_area)
1667            features["feret_diameter_max"].append(region.feret_diameter_max)
1668            features["solidity"].append(region.solidity)
1669            features["perimeter"].append(region.perimeter)
1670            features["perimeter_crofton"].append(region.perimeter_crofton)
1671            features["label"].append(region.label)
1672            features["coords"].append(region.coords)
1673            features["circularity"].append(circularity)
1674            features["intensity_max"].append(np.max(region.intensity_max))
1675            features["intensity_min"].append(np.max(region.intensity_min))
1676            features["intensity_mean"].append(np.max(region.intensity_mean))
1677
1678        ratios = []
1679
1680        # Calculate the ratio for each pair of values
1681        for min_len, max_len in zip(
1682            features["axis_minor_length"], features["axis_major_length"]
1683        ):
1684            if max_len != 0:
1685                ratio = min_len / max_len
1686                ratios.append(ratio)
1687            else:
1688                ratios.append(float(0.0))
1689
1690        features["ratio"] = ratios
1691
1692        return features

Extracts numerical feature descriptors from model output for a given image.

This method processes the output returned by a feature-extraction model (e.g., CNN, encoder network, statistical model) and converts it into a structured feature vector associated with the provided image. Typically used for downstream analysis, classification, or clustering.

Parameters

model_out : any Output returned by the feature-extraction model. The expected format depends on the model (e.g., tensor, dict, list of arrays).

image : ndarray The input image (2D or 3D array) for which features are being extracted. Provided for reference or for combining raw image metrics with model features.

Returns

features : dict Dictionary containing extracted features. Keys correspond to feature names, and values are numerical descriptors.

def nuclei_finder_test(self):
1695    def nuclei_finder_test(self):
1696        """
1697        This method performs testing analysis of parameters (specified 'nms' and 'prob' parameters)
1698        for the image provided by the input_image() method.
1699
1700        This method evaluates the performance of the internal NucleiFinder
1701        configuration using the currently loaded images, parameters, or model
1702        settings. It is typically used to check whether the detection, segmentation
1703        or preprocessing stages run correctly on sample data.
1704
1705        Examples
1706        --------
1707        >>> nf.nuclei_finder_test()
1708        >>> nf.browser_test()
1709        """
1710
1711        StarDist2D.from_pretrained()
1712        model = StarDist2D.from_pretrained("2D_versatile_fluo")
1713
1714        nmst = [0.1, 0.2, 0.6]
1715        probt = [0.1, 0.5, 0.9]
1716
1717        try:
1718            img = cv2.cvtColor(self.image, cv2.COLOR_BGR2GRAY)
1719        except:
1720            img = self.image
1721
1722        plot = []
1723
1724        # adj img
1725        img = adjust_img_16bit(
1726            img,
1727            brightness=self.img_adj_par["brightness"],
1728            contrast=self.img_adj_par["contrast"],
1729            gamma=self.img_adj_par["gamma"],
1730        )
1731        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
1732
1733        fig = plt.figure(dpi=300)
1734        plt.imshow(img)
1735        plt.axis("off")
1736        plt.title("Original", fontsize=25)
1737
1738        if cfg._DISPLAY_MODE:
1739            if self.show_plots:
1740                plt.show()
1741
1742        plot.append(fig)
1743
1744        for n in tqdm(nmst, desc="Loop 1: nmst"):
1745            print(f"\n➡️ Starting outer loop for n = {n}")
1746
1747            for t in tqdm(probt, desc=f"   ↳ Loop 2 for n={n}", leave=False):
1748                print(f"   → Starting inner loop for t = {t}")
1749
1750                labels, _ = model.predict_instances(
1751                    normalize(img.copy()), nms_thresh=n, prob_thresh=t
1752                )
1753
1754                tmp = self.get_features(model_out=labels, image=img)
1755
1756                fig = plt.figure(dpi=300)
1757                plt.imshow(render_label(labels, img=img))
1758                plt.axis("off")
1759                plt.title(
1760                    f"nms {n} & prob {t} \n detected nuc: {len(tmp['area'])}",
1761                    fontsize=25,
1762                )
1763
1764                if cfg._DISPLAY_MODE:
1765                    if self.show_plots:
1766                        plt.show()
1767
1768                plot.append(fig)
1769
1770        self.add_test(plot)

This method performs testing analysis of parameters (specified 'nms' and 'prob' parameters) for the image provided by the input_image() method.

This method evaluates the performance of the internal NucleiFinder configuration using the currently loaded images, parameters, or model settings. It is typically used to check whether the detection, segmentation or preprocessing stages run correctly on sample data.

Examples

>>> nf.nuclei_finder_test()
>>> nf.browser_test()
def find_nuclei(self):
1772    def find_nuclei(self):
1773        """
1774        Performs analysis on the image provided by the ``input_image()`` method
1775        using default or user-defined parameters.
1776
1777        To show current parameters, use:
1778            - ``current_parameters_nuclei``
1779            - ``current_parameters_img_adj``
1780
1781        To set new parameters, use:
1782            - ``set_nms()``
1783            - ``set_prob()``
1784            - ``set_adj_image_gamma()``
1785            - ``set_adj_image_contrast()``
1786            - ``set_adj_image_brightness()``
1787
1788        To get analysis results, use:
1789            - ``get_results_nuclei()``
1790        """
1791
1792        if isinstance(self.image, np.ndarray):
1793
1794            model = StarDist2D.from_pretrained("2D_versatile_fluo")
1795
1796            try:
1797                img = cv2.cvtColor(self.image, cv2.COLOR_BGR2GRAY)
1798            except:
1799                img = self.image
1800
1801            img = adjust_img_16bit(
1802                img,
1803                brightness=self.img_adj_par["brightness"],
1804                contrast=self.img_adj_par["contrast"],
1805                gamma=self.img_adj_par["gamma"],
1806            )
1807            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
1808            labels, _ = model.predict_instances(
1809                normalize(img),
1810                nms_thresh=self.hyperparameter_nuclei["nms"],
1811                prob_thresh=self.hyperparameter_nuclei["prob"],
1812            )
1813
1814            self.nuclei_results["nuclei"] = self.get_features(
1815                model_out=labels, image=img
1816            )
1817
1818            if len(self.nuclei_results["nuclei"]["coords"]) > 0:
1819
1820                oryginal = adjust_img_16bit(img, color="gray")
1821
1822                # series repaired nuclesu
1823                if self.series_im is True:
1824                    self.images["nuclei"] = oryginal
1825                else:
1826                    nuclei_mask = adjust_img_16bit(
1827                        cv2.cvtColor(
1828                            self.create_mask(self.nuclei_results["nuclei"], oryginal),
1829                            cv2.COLOR_BGR2GRAY,
1830                        ),
1831                        color="blue",
1832                    )
1833                    concatenated_image = cv2.hconcat([oryginal, nuclei_mask])
1834                    self.images["nuclei"] = concatenated_image
1835
1836                if cfg._DISPLAY_MODE:
1837                    if self.show_plots:
1838                        display_preview(
1839                            self.resize_to_screen_img(self.images["nuclei"])
1840                        )
1841
1842            else:
1843
1844                self.nuclei_results["nuclei"] = None
1845                self.nuclei_results["nuclei_reduced"] = None
1846                self.nuclei_results["nuclei_chromatinization"] = None
1847
1848                print("Nuclei not detected!")
1849
1850        else:
1851            print("\nAdd image firstly!")

Performs analysis on the image provided by the input_image() method using default or user-defined parameters.

To show current parameters, use: - current_parameters_nuclei - current_parameters_img_adj

To set new parameters, use: - set_nms() - set_prob() - set_adj_image_gamma() - set_adj_image_contrast() - set_adj_image_brightness()

To get analysis results, use: - get_results_nuclei()

def select_nuclei(self):
1853    def select_nuclei(self):
1854        """
1855        Selects data obtained from ``find_nuclei()`` based on the set threshold parameters.
1856
1857        To show current parameters, use:
1858            - ``current_parameters_nuclei``
1859
1860        To set new parameters, use:
1861            - ``set_nuclei_circularity()``
1862            - ``set_nuclei_size()``
1863            - ``set_nuclei_min_mean_intensity()``
1864
1865        To get analysis results, use:
1866            - ``get_results_nuclei_selected()``
1867        """
1868
1869        if self.nuclei_results["nuclei"] is not None:
1870            input_in = copy.deepcopy(self.nuclei_results["nuclei"])
1871
1872            nuclei_dictionary = self.drop_dict(
1873                input_in,
1874                key="area",
1875                var=self.hyperparameter_nuclei["min_size"],
1876                action=">",
1877            )
1878            nuclei_dictionary = self.drop_dict(
1879                nuclei_dictionary,
1880                key="area",
1881                var=self.hyperparameter_nuclei["max_size"],
1882                action="<",
1883            )
1884            nuclei_dictionary = self.drop_dict(
1885                nuclei_dictionary,
1886                key="intensity_mean",
1887                var=self.hyperparameter_nuclei["intensity_mean"],
1888                action=">",
1889            )
1890
1891            if len(nuclei_dictionary["coords"]) > 0:
1892
1893                self.nuclei_results["nuclei_reduced"] = nuclei_dictionary
1894
1895                try:
1896                    img = cv2.cvtColor(self.image, cv2.COLOR_BGR2GRAY)
1897                except:
1898                    img = self.image
1899
1900                oryginal = adjust_img_16bit(img, color="gray")
1901
1902                # series repaired nuclesu
1903                if self.series_im is True:
1904                    self.images["nuclei_reduced"] = oryginal
1905                else:
1906                    nuclei_mask = adjust_img_16bit(
1907                        cv2.cvtColor(
1908                            self.create_mask(
1909                                self.nuclei_results["nuclei_reduced"], oryginal
1910                            ),
1911                            cv2.COLOR_BGR2GRAY,
1912                        ),
1913                        color="blue",
1914                    )
1915                    concatenated_image = cv2.hconcat([oryginal, nuclei_mask])
1916
1917                    self.images["nuclei_reduced"] = concatenated_image
1918
1919                if cfg._DISPLAY_MODE:
1920                    if self.show_plots:
1921                        display_preview(
1922                            self.resize_to_screen_img(self.images["nuclei_reduced"])
1923                        )
1924
1925            else:
1926                self.nuclei_results["nuclei"] = None
1927                self.nuclei_results["nuclei_reduced"] = None
1928                self.nuclei_results["nuclei_chromatinization"] = None
1929
1930                print("Selected zero nuclei! Analysis stop!")
1931
1932        else:
1933            print("Lack of nuclei data to select!")

Selects data obtained from find_nuclei() based on the set threshold parameters.

To show current parameters, use: - current_parameters_nuclei

To set new parameters, use: - set_nuclei_circularity() - set_nuclei_size() - set_nuclei_min_mean_intensity()

To get analysis results, use: - get_results_nuclei_selected()

def nuclei_chromatinization(self):
1935    def nuclei_chromatinization(self):
1936        """
1937        Performs chromatinization analysis of nuclei using data obtained from
1938        ``find_nuclei()`` and/or ``select_nuclei()``.
1939
1940        To show current parameters, use:
1941            - ``current_parameters_chromatinization``
1942            - ``current_parameters_img_adj_chro``
1943
1944        To set new parameters, use:
1945            - ``set_chromatinization_size()``
1946            - ``set_chromatinization_ratio()``
1947            - ``set_chromatinization_cut_point()``
1948            - ``set_adj_chrom_gamma()``
1949            - ``set_adj_chrom_contrast()``
1950            - ``set_adj_chrom_brightness()``
1951
1952        To get analysis results, use:
1953            - ``get_results_nuclei_chromatinization()``
1954        """
1955
1956        def add_lists(f, g):
1957
1958            result = []
1959            max_length = max(len(f), len(g))
1960
1961            for i in range(max_length):
1962                f_elem = f[i] if i < len(f) else ""
1963                g_elem = g[i] if i < len(g) else ""
1964                result.append(f_elem + g_elem)
1965
1966            return result
1967
1968        def reverse_coords(image, x, y):
1969
1970            zero = np.zeros(image.shape)
1971
1972            zero[x, y] = 2**16
1973
1974            zero_indices = np.where(zero == 0)
1975
1976            return zero_indices[0], zero_indices[1]
1977
1978        if isinstance(self.nuclei_results["nuclei_reduced"], dict):
1979            nuclei_dictionary = self.nuclei_results["nuclei_reduced"]
1980        else:
1981            nuclei_dictionary = self.nuclei_results["nuclei"]
1982
1983        if nuclei_dictionary is not None:
1984            arrays_list = copy.deepcopy(nuclei_dictionary["coords"])
1985
1986            chromatione_info = {
1987                "area": [],
1988                "area_bbox": [],
1989                "area_convex": [],
1990                "area_filled": [],
1991                "axis_major_length": [],
1992                "axis_minor_length": [],
1993                "eccentricity": [],
1994                "equivalent_diameter_area": [],
1995                "feret_diameter_max": [],
1996                "solidity": [],
1997                "perimeter": [],
1998                "perimeter_crofton": [],
1999                "coords": [],
2000            }
2001
2002            full_im = np.zeros(self.image.shape[0:2], dtype=np.uint16)
2003            full_im = adjust_img_16bit(full_im)
2004
2005            for arr in arrays_list:
2006                x = list(arr[:, 0])
2007                y = list(arr[:, 1])
2008
2009                x1, y1 = reverse_coords(self.image, x, y)
2010
2011                regions_chro2 = self.image.copy()
2012
2013                regions_chro2[x1, y1] = 0
2014
2015                regions_chro2 = regions_chro2.astype("uint16")
2016
2017                try:
2018                    regions_chro2 = cv2.cvtColor(regions_chro2, cv2.COLOR_BGR2GRAY)
2019                except:
2020                    pass
2021
2022                regions_chro2 = adjust_img_16bit(
2023                    regions_chro2,
2024                    brightness=self.img_adj_par_chrom["brightness"],
2025                    contrast=self.img_adj_par_chrom["contrast"],
2026                    gamma=self.img_adj_par_chrom["gamma"],
2027                )
2028
2029                full_im = merge_images(
2030                    image_list=[full_im, regions_chro2], intensity_factors=[1, 1]
2031                )
2032
2033                ret, thresh = cv2.threshold(
2034                    regions_chro2[x, y],
2035                    0,
2036                    2**16 - 1,
2037                    cv2.THRESH_BINARY + cv2.THRESH_OTSU,
2038                )
2039
2040                regions_chro2[
2041                    regions_chro2
2042                    <= ret * self.hyperparameter_chromatinization["cut_point"]
2043                ] = 0
2044
2045                regions_chro2 = cv2.cvtColor(regions_chro2, cv2.COLOR_BGR2GRAY)
2046
2047                chromatione = regions_chro2 > 0
2048
2049                labeled_cells = measure.label(chromatione)
2050                regions = measure.regionprops(labeled_cells)
2051                regions = measure.regionprops(
2052                    labeled_cells, intensity_image=regions_chro2
2053                )
2054
2055                for region in regions:
2056
2057                    chromatione_info["area"].append(region.area)
2058                    chromatione_info["area_bbox"].append(region.area_bbox)
2059                    chromatione_info["area_convex"].append(region.area_convex)
2060                    chromatione_info["area_filled"].append(region.area_filled)
2061                    chromatione_info["axis_major_length"].append(
2062                        region.axis_major_length
2063                    )
2064                    chromatione_info["axis_minor_length"].append(
2065                        region.axis_minor_length
2066                    )
2067                    chromatione_info["eccentricity"].append(region.eccentricity)
2068                    chromatione_info["equivalent_diameter_area"].append(
2069                        region.equivalent_diameter_area
2070                    )
2071                    chromatione_info["feret_diameter_max"].append(
2072                        region.feret_diameter_max
2073                    )
2074                    chromatione_info["solidity"].append(region.solidity)
2075                    chromatione_info["perimeter"].append(region.perimeter)
2076                    chromatione_info["perimeter_crofton"].append(
2077                        region.perimeter_crofton
2078                    )
2079                    chromatione_info["coords"].append(region.coords)
2080
2081            ratios = []
2082
2083            for min_len, max_len in zip(
2084                chromatione_info["axis_minor_length"],
2085                chromatione_info["axis_major_length"],
2086            ):
2087                if max_len != 0:
2088                    ratio = min_len / max_len
2089                    ratios.append(ratio)
2090                else:
2091                    ratios.append(float(0.0))
2092
2093            chromatione_info["ratio"] = ratios
2094
2095            chromation_dic = self.drop_dict(
2096                chromatione_info,
2097                key="area",
2098                var=self.hyperparameter_chromatinization["min_size"],
2099                action=">",
2100            )
2101            chromation_dic = self.drop_dict(
2102                chromation_dic,
2103                key="area",
2104                var=self.hyperparameter_chromatinization["max_size"],
2105                action="<",
2106            )
2107            chromation_dic = self.drop_dict(
2108                chromation_dic,
2109                key="ratio",
2110                var=self.hyperparameter_chromatinization["ratio"],
2111                action=">",
2112            )
2113
2114            arrays_list2 = copy.deepcopy(chromation_dic["coords"])
2115
2116            nuclei_dictionary["spot_size_area"] = []
2117            nuclei_dictionary["spot_size_area_bbox"] = []
2118            nuclei_dictionary["spot_size_area_convex"] = []
2119            nuclei_dictionary["spot_size_area_filled"] = []
2120            nuclei_dictionary["spot_axis_major_length"] = []
2121            nuclei_dictionary["spot_axis_minor_length"] = []
2122            nuclei_dictionary["spot_eccentricity"] = []
2123            nuclei_dictionary["spot_size_equivalent_diameter_area"] = []
2124            nuclei_dictionary["spot_feret_diameter_max"] = []
2125            nuclei_dictionary["spot_perimeter"] = []
2126            nuclei_dictionary["spot_perimeter_crofton"] = []
2127
2128            for i, arr in enumerate(arrays_list):
2129
2130                spot_size_area = []
2131                spot_size_area_bbox = []
2132                spot_size_area_convex = []
2133                spot_size_area_convex = []
2134                spot_size_area_filled = []
2135                spot_axis_major_length = []
2136                spot_axis_minor_length = []
2137                spot_eccentricity = []
2138                spot_size_equivalent_diameter_area = []
2139                spot_feret_diameter_max = []
2140                spot_perimeter = []
2141                spot_perimeter_crofton = []
2142
2143                # Flatten the array,
2144                df_tmp = pd.DataFrame(arr)
2145                df_tmp["duplicates"] = add_lists(
2146                    [str(x) for x in df_tmp[0]], [str(y) for y in df_tmp[1]]
2147                )
2148
2149                counter_tmp = Counter(df_tmp["duplicates"])
2150
2151                for j, arr2 in enumerate(arrays_list2):
2152                    df_tmp2 = pd.DataFrame(arr2)
2153                    df_tmp2["duplicates"] = add_lists(
2154                        [str(x) for x in df_tmp2[0]], [str(y) for y in df_tmp2[1]]
2155                    )
2156
2157                    counter_tmp2 = Counter(df_tmp2["duplicates"])
2158                    intersection_length = len(counter_tmp.keys() & counter_tmp2.keys())
2159                    min_length = min(len(counter_tmp), len(counter_tmp2))
2160
2161                    if intersection_length >= 0.8 * min_length:
2162
2163                        if (
2164                            len(list(df_tmp2["duplicates"]))
2165                            / len(list(df_tmp["duplicates"]))
2166                        ) >= 0.025 and (
2167                            len(list(df_tmp2["duplicates"]))
2168                            / len(list(df_tmp["duplicates"]))
2169                        ) <= 0.5:
2170                            spot_size_area.append(chromation_dic["area"][j])
2171                            spot_size_area_bbox.append(chromation_dic["area_bbox"][j])
2172                            spot_size_area_convex.append(
2173                                chromation_dic["area_convex"][j]
2174                            )
2175                            spot_size_area_filled.append(
2176                                chromation_dic["area_filled"][j]
2177                            )
2178                            spot_axis_major_length.append(
2179                                chromation_dic["axis_major_length"][j]
2180                            )
2181                            spot_axis_minor_length.append(
2182                                chromation_dic["axis_minor_length"][j]
2183                            )
2184                            spot_eccentricity.append(chromation_dic["eccentricity"][j])
2185                            spot_size_equivalent_diameter_area.append(
2186                                chromation_dic["equivalent_diameter_area"][j]
2187                            )
2188                            spot_feret_diameter_max.append(
2189                                chromation_dic["feret_diameter_max"][j]
2190                            )
2191                            spot_perimeter.append(chromation_dic["perimeter"][j])
2192                            spot_perimeter_crofton.append(
2193                                chromation_dic["perimeter_crofton"][j]
2194                            )
2195
2196                nuclei_dictionary["spot_size_area"].append(spot_size_area)
2197                nuclei_dictionary["spot_size_area_bbox"].append(spot_size_area_bbox)
2198                nuclei_dictionary["spot_size_area_convex"].append(spot_size_area_convex)
2199                nuclei_dictionary["spot_size_area_filled"].append(spot_size_area_filled)
2200                nuclei_dictionary["spot_axis_major_length"].append(
2201                    spot_axis_major_length
2202                )
2203                nuclei_dictionary["spot_axis_minor_length"].append(
2204                    spot_axis_minor_length
2205                )
2206                nuclei_dictionary["spot_eccentricity"].append(spot_eccentricity)
2207                nuclei_dictionary["spot_size_equivalent_diameter_area"].append(
2208                    spot_size_equivalent_diameter_area
2209                )
2210                nuclei_dictionary["spot_feret_diameter_max"].append(
2211                    spot_feret_diameter_max
2212                )
2213                nuclei_dictionary["spot_perimeter"].append(spot_perimeter)
2214                nuclei_dictionary["spot_perimeter_crofton"].append(
2215                    spot_perimeter_crofton
2216                )
2217
2218            self.nuclei_results["chromatinization"] = chromation_dic
2219            self.nuclei_results["nuclei_chromatinization"] = nuclei_dictionary
2220
2221            self.images["nuclei_chromatinization"] = self.create_mask(
2222                chromation_dic, self.image
2223            )
2224
2225            img_chrom = adjust_img_16bit(
2226                cv2.cvtColor(
2227                    self.create_mask(
2228                        self.nuclei_results["chromatinization"], self.image
2229                    ),
2230                    cv2.COLOR_BGR2GRAY,
2231                ),
2232                color="yellow",
2233            )
2234
2235            if isinstance(self.nuclei_results["nuclei_reduced"], dict):
2236                nuclei_mask = adjust_img_16bit(
2237                    cv2.cvtColor(
2238                        self.create_mask(
2239                            self.nuclei_results["nuclei_reduced"], self.image
2240                        ),
2241                        cv2.COLOR_BGR2GRAY,
2242                    ),
2243                    color="blue",
2244                )
2245            else:
2246                nuclei_mask = adjust_img_16bit(
2247                    cv2.cvtColor(
2248                        self.create_mask(self.nuclei_results["nuclei"], self.image),
2249                        cv2.COLOR_BGR2GRAY,
2250                    ),
2251                    color="blue",
2252                )
2253
2254            nuclei_mask = merge_images([nuclei_mask, img_chrom], [1, 1])
2255
2256            try:
2257                img = cv2.cvtColor(full_im, cv2.COLOR_BGR2GRAY)
2258            except:
2259                img = full_im
2260
2261            oryginal = adjust_img_16bit(img, color="gray")
2262
2263            concatenated_image = cv2.hconcat([oryginal, nuclei_mask])
2264
2265            self.images["nuclei_chromatinization"] = concatenated_image
2266
2267            if cfg._DISPLAY_MODE:
2268                if self.show_plots:
2269                    display_preview(
2270                        self.resize_to_screen_img(
2271                            self.images["nuclei_chromatinization"]
2272                        )
2273                    )
2274
2275        else:
2276            print("Lack of nuclei data to select!")

Performs chromatinization analysis of nuclei using data obtained from find_nuclei() and/or select_nuclei().

To show current parameters, use: - current_parameters_chromatinization - current_parameters_img_adj_chro

To set new parameters, use: - set_chromatinization_size() - set_chromatinization_ratio() - set_chromatinization_cut_point() - set_adj_chrom_gamma() - set_adj_chrom_contrast() - set_adj_chrom_brightness()

To get analysis results, use: - get_results_nuclei_chromatinization()

def browser_test(self):
2587    def browser_test(self):
2588        """
2589        Displays test results generated by the ``nuclei_finder_test()`` method
2590        in the default web browser.
2591        """
2592
2593        html_content = ""
2594
2595        for fig in self.test_results:
2596            buf = BytesIO()
2597            fig.savefig(buf, format="png", bbox_inches="tight")
2598            buf.seek(0)
2599
2600            img_base64 = base64.b64encode(buf.read()).decode("utf-8")
2601
2602            html_content += f'<img src="data:image/png;base64,{img_base64}" style="margin:10px;"/>\n'
2603
2604        with tempfile.NamedTemporaryFile(
2605            mode="w", delete=False, suffix=".html"
2606        ) as tmp_file:
2607            tmp_file.write(html_content)
2608            tmp_filename = tmp_file.name
2609
2610        webbrowser.open_new_tab(tmp_filename)

Displays test results generated by the nuclei_finder_test() method in the default web browser.

def series_analysis_chromatinization( self, path_to_images: str, file_extension: str = 'tiff', selected_id: list = [], fille_name_part: str = '', selection_opt: bool = True, include_img: bool = True, test_series: int = 0):
2612    def series_analysis_chromatinization(
2613        self,
2614        path_to_images: str,
2615        file_extension: str = "tiff",
2616        selected_id: list = [],
2617        fille_name_part: str = "",
2618        selection_opt: bool = True,
2619        include_img: bool = True,
2620        test_series: int = 0,
2621    ):
2622        """
2623        Performs full analysis on images provided via the ``input_image()`` method
2624        using default or user-defined parameters.
2625
2626        This method runs nuclei detection, nuclei selection, and chromatinization
2627        analysis in a single pipeline. Users can adjust parameters for each step
2628        before running the analysis.
2629
2630        To show current parameters, use:
2631            - ``current_parameters_nuclei``
2632            - ``current_parameters_img_adj``
2633            - ``current_parameters_chromatinization``
2634            - ``current_parameters_img_adj_chro``
2635
2636        To set new parameters, use:
2637            - ``set_nms()``
2638            - ``set_prob()``
2639            - ``set_adj_image_gamma()``
2640            - ``set_adj_image_contrast()``
2641            - ``set_adj_image_brightness()``
2642            - ``set_nuclei_circularity()``
2643            - ``set_nuclei_size()``
2644            - ``set_nuclei_min_mean_intensity()``
2645            - ``set_chromatinization_size()``
2646            - ``set_chromatinization_ratio()``
2647            - ``set_chromatinization_cut_point()``
2648            - ``set_adj_chrom_gamma()``
2649            - ``set_adj_chrom_contrast()``
2650            - ``set_adj_chrom_brightness()``
2651
2652        Parameters
2653        ----------
2654        path_to_images : str
2655            Path to the directory containing images for analysis.
2656
2657        file_extension : str, optional
2658            Extension of the image files. Default is 'tiff'.
2659
2660        selected_id : list, optional
2661            List of IDs that must be part of the image name to distinguish them
2662            from others. Default is an empty list, which means all images in
2663            the directory will be processed.
2664
2665        fille_name_part : str, optional
2666            Part of the file name to filter images. Default is an empty string.
2667
2668        selection_opt : bool, optional
2669            Whether to run ``select_nuclei()`` with the defined parameters. Default is True.
2670
2671        include_img : bool, optional
2672            Whether to include the images in the result dictionary. Default is True.
2673
2674        test_series : int, optional
2675            Number of images to test the parameters and return results. Default is 0,
2676            which means all images in the directory will be processed.
2677
2678        Returns
2679        -------
2680        results_dict : dict
2681            Dictionary containing results for each image in the directory.
2682            Keys correspond to image file names.
2683
2684        Notes
2685        -----
2686        This method runs the complete nuclei and chromatinization analysis pipeline.
2687
2688        Parameters must be set appropriately before calling to ensure correct results.
2689        """
2690
2691        results_dict = {}
2692        results_img = {}
2693        results_img_raw = {}
2694
2695        files = glob.glob(os.path.join(path_to_images, "*." + file_extension))
2696
2697        if len(fille_name_part) > 0:
2698            files = [x for x in files if fille_name_part.lower() in x.lower()]
2699
2700        if len(selected_id) > 0:
2701            selected_id = [str(x) for x in selected_id]
2702            files = [
2703                x
2704                for x in files
2705                if re.sub("_.*", "", os.path.basename(x)) in selected_id
2706            ]
2707
2708        if test_series > 0:
2709
2710            files = random.sample(files, test_series)
2711
2712        self.show_plots = False
2713        self.series_im = True
2714
2715        print("\nFile analysis:\n\n")
2716
2717        for file in tqdm(files):
2718
2719            print(file)
2720
2721            self.show_plots = False
2722
2723            image = self.load_image(file)
2724
2725            self.input_image(image)
2726
2727            self.find_nuclei()
2728
2729            tmp = None
2730
2731            if selection_opt is True:
2732                self.select_nuclei()
2733                tmp = self.get_results_nuclei_selected()
2734
2735            else:
2736                tmp = self.get_results_nuclei()
2737
2738            if tmp is not None:
2739
2740                if tmp[0] is not None:
2741
2742                    results_dict[str(os.path.basename(file))] = tmp[0]
2743                    results_img[str(os.path.basename(file))] = tmp[1]
2744                    results_img_raw[str(os.path.basename(file))] = image
2745                    del tmp
2746                    del image
2747
2748        results_dict_tmp = self.repairing_nuclei(results_dict)
2749
2750        results_dict = {}
2751
2752        print("\nChromatization searching:\n\n")
2753
2754        for ke in tqdm(results_dict_tmp.keys()):
2755
2756            tmp = None
2757
2758            try:
2759                self._nuclei_chromatinization_series(
2760                    results_img_raw[ke], results_dict_tmp[ke]
2761                )
2762                tmp = self.get_results_nuclei_chromatinization()
2763            except:
2764                print(f"Sample {ke} could not be processed.")
2765
2766            if tmp is not None:
2767
2768                if tmp[0] is not None:
2769
2770                    tmp[0].pop("coords")
2771
2772                    if include_img:
2773                        results_dict[str(os.path.basename(ke))] = {
2774                            "stats": tmp[0],
2775                            "img": cv2.hconcat([results_img[ke], tmp[1]]),
2776                        }
2777                        del tmp
2778                    else:
2779                        results_dict[str(os.path.basename(ke))] = tmp[0]
2780                        del tmp
2781
2782            else:
2783                print(f"Unable to obtain results for {print(ke)}")
2784
2785        self.show_plots = True
2786        self.series_im = False
2787
2788        return results_dict

Performs full analysis on images provided via the input_image() method using default or user-defined parameters.

This method runs nuclei detection, nuclei selection, and chromatinization analysis in a single pipeline. Users can adjust parameters for each step before running the analysis.

To show current parameters, use: - current_parameters_nuclei - current_parameters_img_adj - current_parameters_chromatinization - current_parameters_img_adj_chro

To set new parameters, use: - set_nms() - set_prob() - set_adj_image_gamma() - set_adj_image_contrast() - set_adj_image_brightness() - set_nuclei_circularity() - set_nuclei_size() - set_nuclei_min_mean_intensity() - set_chromatinization_size() - set_chromatinization_ratio() - set_chromatinization_cut_point() - set_adj_chrom_gamma() - set_adj_chrom_contrast() - set_adj_chrom_brightness()

Parameters

path_to_images : str Path to the directory containing images for analysis.

file_extension : str, optional Extension of the image files. Default is 'tiff'.

selected_id : list, optional List of IDs that must be part of the image name to distinguish them from others. Default is an empty list, which means all images in the directory will be processed.

fille_name_part : str, optional Part of the file name to filter images. Default is an empty string.

selection_opt : bool, optional Whether to run select_nuclei() with the defined parameters. Default is True.

include_img : bool, optional Whether to include the images in the result dictionary. Default is True.

test_series : int, optional Number of images to test the parameters and return results. Default is 0, which means all images in the directory will be processed.

Returns

results_dict : dict Dictionary containing results for each image in the directory. Keys correspond to image file names.

Notes

This method runs the complete nuclei and chromatinization analysis pipeline.

Parameters must be set appropriately before calling to ensure correct results.

def series_analysis_nuclei( self, path_to_images: str, file_extension: str = 'tiff', selected_id: list = [], fille_name_part: str = '', selection_opt: bool = True, include_img: bool = True, test_series: int = 0):
2790    def series_analysis_nuclei(
2791        self,
2792        path_to_images: str,
2793        file_extension: str = "tiff",
2794        selected_id: list = [],
2795        fille_name_part: str = "",
2796        selection_opt: bool = True,
2797        include_img: bool = True,
2798        test_series: int = 0,
2799    ):
2800        """
2801        Performs analysis on the image provided by the ``input_image()`` method
2802        using default or user-defined parameters.
2803
2804        This method runs nuclei detection and selection using the currently set
2805        parameters. Users can adjust image preprocessing and nuclei detection
2806        parameters before running the analysis.
2807
2808        To show current parameters, use:
2809            - ``current_parameters_nuclei``
2810            - ``current_parameters_img_adj``
2811
2812        To set new parameters, use:
2813            - ``set_nms()``
2814            - ``set_prob()``
2815            - ``set_adj_image_gamma()``
2816            - ``set_adj_image_contrast()``
2817            - ``set_adj_image_brightness()``
2818            - ``set_nuclei_circularity()``
2819            - ``set_nuclei_size()``
2820            - ``set_nuclei_min_mean_intensity()``
2821
2822        Parameters
2823        ----------
2824        path_to_images : str
2825            Path to the directory containing images for analysis.
2826
2827        file_extension : str, optional
2828            Extension of the image files. Default is 'tiff'.
2829
2830        selected_id : list, optional
2831            List of IDs that must be part of the image name to distinguish them
2832            from others. Default is an empty list, which means all images in
2833            the directory will be processed.
2834
2835        fille_name_part : str, optional
2836            Part of the file name to filter images. Default is an empty string.
2837
2838        selection_opt : bool, optional
2839            Whether to run the ``select_nuclei()`` method with the defined parameters.
2840            Default is True.
2841
2842        include_img : bool, optional
2843            Whether to include the images in the result dictionary. Default is True.
2844
2845        test_series : int, optional
2846            Number of images to test the parameters and return results. Default is 0,
2847            which means all images in the directory will be processed.
2848
2849        Returns
2850        -------
2851        results_dict : dict
2852            Dictionary containing results for each image in the directory.
2853            Keys correspond to image file names.
2854        """
2855
2856        results_dict = {}
2857        results_img = {}
2858
2859        files = glob.glob(os.path.join(path_to_images, "*." + file_extension))
2860
2861        if len(fille_name_part) > 0:
2862            files = [x for x in files if fille_name_part.lower() in x.lower()]
2863
2864        if len(selected_id) > 0:
2865            selected_id = [str(x) for x in selected_id]
2866            files = [
2867                x
2868                for x in files
2869                if re.sub("_.*", "", os.path.basename(x)) in selected_id
2870            ]
2871
2872        if test_series > 0:
2873
2874            files = random.sample(files, test_series)
2875
2876        self.show_plots = False
2877        self.series_im = True
2878
2879        print("\nFile analysis:\n\n")
2880
2881        for file in tqdm(files):
2882
2883            print(file)
2884
2885            image = self.load_image(file)
2886
2887            self.input_image(image)
2888
2889            self.find_nuclei()
2890
2891            if self.nuclei_results["nuclei"] is not None:
2892
2893                tmp = [None]
2894
2895                if selection_opt is True:
2896                    self.select_nuclei()
2897                    tmp = self.get_results_nuclei_selected()
2898
2899                else:
2900                    tmp = self.get_results_nuclei()
2901
2902                if tmp is not None:
2903
2904                    if tmp[0] is not None:
2905
2906                        if include_img:
2907                            results_dict[str(os.path.basename(file))] = tmp[0]
2908                            results_img[str(os.path.basename(file))] = tmp[1]
2909
2910                            del tmp
2911
2912                        else:
2913                            results_dict[str(os.path.basename(file))] = tmp[0]
2914                            del tmp
2915
2916                else:
2917                    print(f"Unable to obtain results for {print(file)}")
2918
2919            else:
2920
2921                print(f"Unable to obtain results for {print(file)}")
2922
2923        self.show_plots = True
2924        self.series_im = False
2925
2926        results_dict_tmp = self.repairing_nuclei(results_dict)
2927
2928        if include_img is False:
2929
2930            return results_dict_tmp
2931
2932        else:
2933
2934            results_dict = {}
2935
2936            for ke in results_dict_tmp.keys():
2937
2938                nuclei_mask = adjust_img_16bit(
2939                    cv2.cvtColor(
2940                        self.create_mask(results_dict_tmp[ke], results_img[ke]),
2941                        cv2.COLOR_BGR2GRAY,
2942                    ),
2943                    color="blue",
2944                )
2945                concatenated_image = cv2.hconcat([results_img[ke], nuclei_mask])
2946
2947                cred = results_dict_tmp[ke]
2948                # cred.pop('coords')
2949
2950                results_dict[ke] = {"stats": cred, "img": concatenated_image}
2951
2952            return results_dict

Performs analysis on the image provided by the input_image() method using default or user-defined parameters.

This method runs nuclei detection and selection using the currently set parameters. Users can adjust image preprocessing and nuclei detection parameters before running the analysis.

To show current parameters, use: - current_parameters_nuclei - current_parameters_img_adj

To set new parameters, use: - set_nms() - set_prob() - set_adj_image_gamma() - set_adj_image_contrast() - set_adj_image_brightness() - set_nuclei_circularity() - set_nuclei_size() - set_nuclei_min_mean_intensity()

Parameters

path_to_images : str Path to the directory containing images for analysis.

file_extension : str, optional Extension of the image files. Default is 'tiff'.

selected_id : list, optional List of IDs that must be part of the image name to distinguish them from others. Default is an empty list, which means all images in the directory will be processed.

fille_name_part : str, optional Part of the file name to filter images. Default is an empty string.

selection_opt : bool, optional Whether to run the select_nuclei() method with the defined parameters. Default is True.

include_img : bool, optional Whether to include the images in the result dictionary. Default is True.

test_series : int, optional Number of images to test the parameters and return results. Default is 0, which means all images in the directory will be processed.

Returns

results_dict : dict Dictionary containing results for each image in the directory. Keys correspond to image file names.

class NucleiDataManagement:
2955class NucleiDataManagement:
2956    """
2957    Manages nuclei analysis data obtained from the `NucleiFinder` class,
2958    including nuclei properties and optionally Image Stream (IS) data.
2959
2960    This class allows loading nuclei data from JSON files or directly from
2961    `NucleiFinder` analysis results, converting them to pandas DataFrames,
2962    adding IS data, concatenating results from multiple experiments, and
2963    saving results in JSON or CSV format. It also provides helper methods
2964    for merging, filtering, and retrieving data.
2965
2966    Attributes
2967    ----------
2968    nuceli_data : dict
2969        Dictionary storing nuclei properties for each image or experiment.
2970
2971    experiment_name : str
2972        Name of the experiment.
2973
2974    nuceli_data_df : pd.DataFrame or None
2975        DataFrame representation of nuclei properties.
2976
2977    nuclei_IS_data : pd.DataFrame or None
2978        DataFrame of nuclei data merged with IS data.
2979
2980    concat_data : list or None
2981        List of other `NucleiDataManagement` objects added for combined analysis.
2982
2983    Methods
2984    -------
2985    load_nuc_dict(path)
2986        Load nuclei data from a JSON dictionary file (*.nuc) and initialize the object.
2987        _convert_to_df()
2988        Convert nuclei dictionary data to a pandas DataFrame.
2989
2990    add_IS_data(IS_data, IS_features)
2991        Merge Image Stream (IS) data with nuclei data.
2992
2993    get_data()
2994        Retrieve the nuclei data as a pandas DataFrame.
2995
2996    get_data_with_IS()
2997        Retrieve the nuclei data merged with IS data.
2998
2999    save_nuc_project(path)
3000        Save nuclei data as a JSON file with *.nuc extension.
3001
3002    save_results_df(path)
3003        Save nuclei data as a CSV file.
3004
3005    save_results_df_with_IS(path)
3006        Save nuclei data merged with IS data as a CSV file.
3007
3008    add_experiment(data_list)
3009        Add other `NucleiDataManagement` objects for concatenated analysis.
3010
3011    get_mutual_experiments_data(inc_is)
3012        Retrieve concatenated nuclei data from multiple experiments.
3013
3014    save_mutual_experiments(path, inc_is)
3015        Save concatenated data from multiple experiments as a CSV file.
3016    """
3017
3018    def __init__(self, nuclei_data: dict, experiment_name: str):
3019        """
3020        Initialize a NucleiDataManagement object with nuclei data and experiment name.
3021
3022        Parameters
3023        ----------
3024        nuclei_data : dict
3025            Dictionary containing nuclei properties for each image or experiment.
3026            If the dictionary entries have keys 'stats' and 'img', only 'stats' are stored.
3027
3028        experiment_name : str
3029            Name of the experiment.
3030
3031        Attributes
3032        ----------
3033        nuceli_data : dict
3034            Dictionary storing nuclei properties for each image or experiment.
3035
3036        experiment_name : str
3037            Name of the experiment.
3038
3039        nuceli_data_df : pd.DataFrame or None
3040            DataFrame representation of nuclei properties (initialized as None).
3041
3042        nuclei_IS_data : pd.DataFrame or None
3043            DataFrame of nuclei data merged with Image Stream (IS) data (initialized as None).
3044
3045        concat_data : list or None
3046            List of other `NucleiDataManagement` objects added for combined analysis (initialized as None).
3047        """
3048
3049        if set(nuclei_data[list(nuclei_data.keys())[0]].keys()) == set(
3050            ["stats", "img"]
3051        ):
3052
3053            self.nuceli_data = {}
3054
3055            for k in nuclei_data.keys():
3056                self.nuceli_data[k] = nuclei_data[k]["stats"]
3057
3058            for k in self.nuceli_data.keys():
3059                if "coords" in self.nuceli_data[k].keys():
3060                    self.nuceli_data[k].pop("coords")
3061
3062        else:
3063            self.nuceli_data = nuclei_data
3064
3065            for k in self.nuceli_data.keys():
3066                if "coords" in self.nuceli_data[k].keys():
3067                    self.nuceli_data[k].pop("coords")
3068
3069        self.experiment_name = experiment_name
3070        """Name of the experiment."""
3071
3072        self.nuceli_data_df = None
3073        """Stored DataFrame representation of nuclei features"""
3074
3075        self.nuclei_IS_data = None
3076        """Stored DataFrame of data from Image Stream (IS)."""
3077
3078        self.concat_data = None
3079        """Sotored list of other `NucleiDataManagement` objects."""
3080
3081    @classmethod
3082    def load_nuc_dict(cls, path: str):
3083        """
3084        Initialize a NucleiDataManagement object from a JSON dictionary file.
3085
3086        The loaded data must be previously saved using the ``save_nuc_project()`` method.
3087
3088        Parameters
3089        ----------
3090        path : str
3091            Path to the *.nuc JSON file containing nuclei data.
3092        """
3093
3094        if ".nuc" in path:
3095
3096            if os.path.exists(path):
3097
3098                with open(path, "r") as json_file:
3099                    loaded_data = json.load(json_file)
3100
3101                return cls(loaded_data, os.path.splitext(os.path.basename(path))[0])
3102
3103            else:
3104                raise ValueError("\nInvalid path!")
3105
3106        else:
3107            raise ValueError(
3108                "\nInvalid dictionary to load. It must contain a .nuc extension!"
3109            )
3110
3111    def _convert_to_df(self):
3112        """
3113        Helper method that converts the internal nuclei dictionary into a pandas DataFrame.
3114
3115        This method iterates over the nuclei data stored in `self.nuceli_data`,
3116        flattens the information for each nucleus, computes aggregate statistics
3117        for associated spots if present, and stores the resulting DataFrame in
3118        `self.nuceli_data_df`.
3119        """
3120
3121        nuclei_data = self.nuceli_data
3122
3123        data = []
3124
3125        for i in tqdm(nuclei_data.keys()):
3126            for n, _ in enumerate(nuclei_data[i]["area"]):
3127                row = {
3128                    "id_name": re.sub("_.*", "", i),
3129                    "nuclei_area": nuclei_data[i]["area"][n],
3130                    "nuclei_area_bbox": nuclei_data[i]["area_bbox"][n],
3131                    "nuclei_equivalent_diameter_area": nuclei_data[i][
3132                        "equivalent_diameter_area"
3133                    ][n],
3134                    "nuclei_feret_diameter_max": nuclei_data[i]["feret_diameter_max"][
3135                        n
3136                    ],
3137                    "nuclei_axis_major_length": nuclei_data[i]["axis_major_length"][n],
3138                    "nuclei_axis_minor_length": nuclei_data[i]["axis_minor_length"][n],
3139                    "nuclei_circularity": nuclei_data[i]["circularity"][n],
3140                    "nuclei_eccentricity": nuclei_data[i]["eccentricity"][n],
3141                    "nuclei_perimeter": nuclei_data[i]["perimeter"][n],
3142                    "nuclei_ratio": nuclei_data[i]["ratio"][n],
3143                    "nuclei_solidity": nuclei_data[i]["solidity"][n],
3144                }
3145
3146                if "spot_size_area" in nuclei_data[i]:
3147                    if len(nuclei_data[i]["spot_size_area"][n]) > 0:
3148                        row.update(
3149                            {
3150                                "spot_n": len(nuclei_data[i]["spot_size_area"][n]),
3151                                "avg_spot_area": np.mean(
3152                                    nuclei_data[i]["spot_size_area"][n]
3153                                ),
3154                                "avg_spot_area_bbox": np.mean(
3155                                    nuclei_data[i]["spot_size_area_bbox"][n]
3156                                ),
3157                                "avg_spot_perimeter": np.mean(
3158                                    nuclei_data[i]["spot_perimeter"][n]
3159                                ),
3160                                "sum_spot_area": np.sum(
3161                                    nuclei_data[i]["spot_size_area"][n]
3162                                ),
3163                                "sum_spot_area_bbox": np.sum(
3164                                    nuclei_data[i]["spot_size_area_bbox"][n]
3165                                ),
3166                                "sum_spot_perimeter": np.sum(
3167                                    nuclei_data[i]["spot_perimeter"][n]
3168                                ),
3169                                "avg_spot_axis_major_length": np.mean(
3170                                    nuclei_data[i]["spot_axis_major_length"][n]
3171                                ),
3172                                "avg_spot_axis_minor_length": np.mean(
3173                                    nuclei_data[i]["spot_axis_minor_length"][n]
3174                                ),
3175                                "avg_spot_eccentricity": np.mean(
3176                                    nuclei_data[i]["spot_eccentricity"][n]
3177                                ),
3178                                "avg_spot_size_equivalent_diameter_area": np.mean(
3179                                    nuclei_data[i][
3180                                        "spot_size_equivalent_diameter_area"
3181                                    ][n]
3182                                ),
3183                                "sum_spot_size_equivalent_diameter_area": np.sum(
3184                                    nuclei_data[i][
3185                                        "spot_size_equivalent_diameter_area"
3186                                    ][n]
3187                                ),
3188                            }
3189                        )
3190                    else:
3191                        row.update(
3192                            {
3193                                k: 0
3194                                for k in [
3195                                    "spot_n",
3196                                    "avg_spot_area",
3197                                    "avg_spot_area_bbox",
3198                                    "avg_spot_perimeter",
3199                                    "sum_spot_area",
3200                                    "sum_spot_area_bbox",
3201                                    "sum_spot_perimeter",
3202                                    "avg_spot_axis_major_length",
3203                                    "avg_spot_axis_minor_length",
3204                                    "avg_spot_eccentricity",
3205                                    "avg_spot_size_equivalent_diameter_area",
3206                                    "sum_spot_size_equivalent_diameter_area",
3207                                ]
3208                            }
3209                        )
3210
3211                data.append(row)
3212
3213        nuclei_df = pd.DataFrame(data)
3214
3215        nuclei_df["nuclei_per_img"] = nuclei_df.groupby("id_name")["id_name"].transform(
3216            "count"
3217        )
3218        nuclei_df["set"] = self.experiment_name
3219
3220        self.nuceli_data_df = nuclei_df
3221
3222    def add_IS_data(self, IS_data: pd.DataFrame, IS_features: list = []):
3223        """
3224        Merge Image Stream (IS) data with nuclei analysis data.
3225
3226        This method concatenates IS (Image Stream, https://cytekbio.com/pages/imagestream)
3227        results with the nuclei data stored in the object. The merge is performed based
3228        on object IDs, allowing joint analysis of nuclei features and IS features.
3229
3230        Parameters
3231        ----------
3232        IS_data : pd.DataFrame
3233            DataFrame containing IS data results.
3234
3235        IS_features : list, optional
3236            List of features to extract from the IS data. Default is an empty list.
3237
3238        Notes
3239        -----
3240        The merged data will be stored in the attribute `self.nuclei_IS_data`.
3241        """
3242
3243        nuclei_data = self._get_df()
3244
3245        IS_data["set"] = self.experiment_name
3246
3247        if len(IS_features) > 0:
3248            IS_features = list(set(IS_features + ["Object Number", "set"]))
3249            IS_data = IS_data[IS_features]
3250
3251        nuclei_data["id"] = (
3252            nuclei_data["id_name"].astype(str) + "_" + nuclei_data["set"]
3253        )
3254        IS_data["id"] = IS_data["Object Number"].astype(str) + "_" + IS_data["set"]
3255
3256        merged_data = pd.merge(nuclei_data, IS_data, on="id", how="left")
3257        merged_data.pop("set_x")
3258        merged_data = merged_data.rename(columns={"set_y": "set"})
3259
3260        self.nuclei_IS_data = merged_data
3261
3262    def _get_df(self):
3263        """
3264        Helper method to retrieve the nuclei data as a pandas DataFrame.
3265
3266        If the internal DataFrame `self.nuceli_data_df` has not been created yet,
3267        this method calls `_convert_to_df()` to generate it from `self.nuceli_data`.
3268        """
3269
3270        if self.nuceli_data_df is None:
3271            self._convert_to_df()
3272
3273        return self.nuceli_data_df
3274
3275    def get_data_with_IS(self):
3276        """
3277        Retrieve nuclei results for a single project including IS data.
3278
3279        Returns
3280        -------
3281        pd.DataFrame or None
3282            DataFrame containing nuclei results merged with IS (Image Stream) data
3283            added via `self.add_IS_data()`. Returns None if no IS data has been added.
3284        """
3285
3286        if self.nuclei_IS_data is None:
3287            print("\nNothing to return!")
3288        return self.nuclei_IS_data
3289
3290    def get_data(self):
3291        """
3292        Retrieve nuclei results for a single project as a pandas DataFrame.
3293
3294        Returns
3295        -------
3296        pd.DataFrame
3297            DataFrame containing nuclei analysis results for the experiment.
3298        """
3299
3300        return self._get_df()
3301
3302    def save_nuc_project(self, path: str = ""):
3303        """
3304        Save nuclei results as a JSON file with a *.nuc extension.
3305
3306        The saved data can later be loaded using the `cls.load_nuc_dict()` method.
3307        Results must be obtained from the `NucleiFinder` class using
3308        `series_analysis_nuclei()` or `series_analysis_chromatinization()` methods.
3309
3310        Parameters
3311        ----------
3312        path : str, optional
3313            Directory where the results will be saved. Default is the current working directory.
3314        """
3315
3316        data = self.nuceli_data
3317
3318        if len(data.keys()) > 0:
3319            full_path = os.path.join(path, self.experiment_name)
3320
3321            with open(full_path + ".nuc", "w") as json_file:
3322                json.dump(data, json_file, indent=4)
3323        else:
3324            print("\nData not provided!")
3325
3326    def save_results_df(self, path: str = ""):
3327        """
3328        Save nuclei results for a single project as a CSV file.
3329
3330        Results must be obtained from the `NucleiFinder` class using
3331        `series_analysis_nuclei()` or `series_analysis_chromatinization()` methods.
3332
3333        Parameters
3334        ----------
3335        path : str, optional
3336            Directory where the CSV file will be saved. Default is the current working directory.
3337        """
3338
3339        data = self.get_data()
3340
3341        full_path = os.path.join(path, f"{self.experiment_name}.csv")
3342
3343        data.to_csv(full_path, index=False)
3344
3345    def save_results_df_with_IS(self, path: str = ""):
3346        """
3347        Save nuclei results with IS data for a single project as a CSV file.
3348
3349        Results must be obtained from the `NucleiFinder` class using
3350        `series_analysis_nuclei()` or `series_analysis_chromatinization()` methods.
3351        IS data should have been added via `self.add_IS_data()`.
3352
3353        Parameters
3354        ----------
3355        path : str, optional
3356            Directory where the CSV file will be saved. Default is the current working directory.
3357        """
3358
3359        data = self.get_data_with_IS()
3360
3361        if data is None:
3362            raise ValueError("There was nothing to save.")
3363
3364        full_path = os.path.join(path, f"{self.experiment_name}_IS.csv")
3365        data.to_csv(full_path, index=False)
3366
3367    def add_experiment(self, data_list: list):
3368        """
3369        Add additional NucleiDataManagement objects from other experiments for concatenation.
3370
3371        Parameters
3372        ----------
3373        data_list : list
3374            List of `NucleiDataManagement` objects from separate experiments to be added.
3375        """
3376
3377        valid_class = []
3378        for obj in data_list:
3379            if isinstance(obj, self.__class__):
3380                valid_class.append(obj)
3381            else:
3382                print(f"Object {obj} is invalid type.")
3383
3384        self.concat_data = valid_class
3385
3386    def get_mutual_experiments_data(self, inc_is: bool = False):
3387        """
3388        Retrieve concatenated NucleiDataManagement data from other added experiments.
3389
3390        Parameters
3391        ----------
3392        inc_is : bool, optional
3393            Whether to include IS (Image Stream) data, if it was added to each experiment. Default is False.
3394
3395        Returns
3396        -------
3397        pd.DataFrame
3398            Concatenated nuclei data (with or without IS data) from all added experiments.
3399        """
3400
3401        if self.concat_data is not None:
3402            if inc_is:
3403
3404                try:
3405                    final_df = pd.concat(
3406                        [x.get_data_with_IS() for x in self.concat_data]
3407                        + [self.get_data_with_IS()]
3408                    )
3409                except:
3410                    raise ValueError(
3411                        "Lack of IS data in some object. Check if the IS data was added to each project."
3412                    )
3413
3414            else:
3415                final_df = pd.concat(
3416                    [x.get_data() for x in self.concat_data] + [self.get_data()]
3417                )
3418
3419            return final_df
3420
3421        raise ValueError("No object to concatenate. Nothing to return!")
3422
3423    def save_mutual_experiments(self, path: str = "", inc_is: bool = False):
3424        """
3425        Save concatenated NucleiDataManagement data from added experiments as a CSV file.
3426
3427        Parameters
3428        ----------
3429        inc_is : bool, optional
3430            Whether to include IS (Image Stream) data, if it was added to each experiment. Default is False.
3431        """
3432
3433        dt = self.get_mutual_experiments_data(inc_is=inc_is)
3434
3435        experimets = [self.experiment_name] + [
3436            n.experiment_name for n in self.concat_data
3437        ]
3438
3439        experimets_names = "_".join(experimets)
3440
3441        if inc_is:
3442            full_path = os.path.join(path, f"{experimets_names}_IS.csv")
3443        else:
3444            full_path = os.path.join(path, f"{experimets_names}.csv")
3445
3446        dt.to_csv(full_path, index=False)

Manages nuclei analysis data obtained from the NucleiFinder class, including nuclei properties and optionally Image Stream (IS) data.

This class allows loading nuclei data from JSON files or directly from NucleiFinder analysis results, converting them to pandas DataFrames, adding IS data, concatenating results from multiple experiments, and saving results in JSON or CSV format. It also provides helper methods for merging, filtering, and retrieving data.

Attributes

nuceli_data : dict Dictionary storing nuclei properties for each image or experiment.

experiment_name : str Name of the experiment.

nuceli_data_df : pd.DataFrame or None DataFrame representation of nuclei properties.

nuclei_IS_data : pd.DataFrame or None DataFrame of nuclei data merged with IS data.

concat_data : list or None List of other NucleiDataManagement objects added for combined analysis.

Methods

load_nuc_dict(path) Load nuclei data from a JSON dictionary file (*.nuc) and initialize the object. _convert_to_df() Convert nuclei dictionary data to a pandas DataFrame.

add_IS_data(IS_data, IS_features) Merge Image Stream (IS) data with nuclei data.

get_data() Retrieve the nuclei data as a pandas DataFrame.

get_data_with_IS() Retrieve the nuclei data merged with IS data.

save_nuc_project(path) Save nuclei data as a JSON file with *.nuc extension.

save_results_df(path) Save nuclei data as a CSV file.

save_results_df_with_IS(path) Save nuclei data merged with IS data as a CSV file.

add_experiment(data_list) Add other NucleiDataManagement objects for concatenated analysis.

get_mutual_experiments_data(inc_is) Retrieve concatenated nuclei data from multiple experiments.

save_mutual_experiments(path, inc_is) Save concatenated data from multiple experiments as a CSV file.

NucleiDataManagement(nuclei_data: dict, experiment_name: str)
3018    def __init__(self, nuclei_data: dict, experiment_name: str):
3019        """
3020        Initialize a NucleiDataManagement object with nuclei data and experiment name.
3021
3022        Parameters
3023        ----------
3024        nuclei_data : dict
3025            Dictionary containing nuclei properties for each image or experiment.
3026            If the dictionary entries have keys 'stats' and 'img', only 'stats' are stored.
3027
3028        experiment_name : str
3029            Name of the experiment.
3030
3031        Attributes
3032        ----------
3033        nuceli_data : dict
3034            Dictionary storing nuclei properties for each image or experiment.
3035
3036        experiment_name : str
3037            Name of the experiment.
3038
3039        nuceli_data_df : pd.DataFrame or None
3040            DataFrame representation of nuclei properties (initialized as None).
3041
3042        nuclei_IS_data : pd.DataFrame or None
3043            DataFrame of nuclei data merged with Image Stream (IS) data (initialized as None).
3044
3045        concat_data : list or None
3046            List of other `NucleiDataManagement` objects added for combined analysis (initialized as None).
3047        """
3048
3049        if set(nuclei_data[list(nuclei_data.keys())[0]].keys()) == set(
3050            ["stats", "img"]
3051        ):
3052
3053            self.nuceli_data = {}
3054
3055            for k in nuclei_data.keys():
3056                self.nuceli_data[k] = nuclei_data[k]["stats"]
3057
3058            for k in self.nuceli_data.keys():
3059                if "coords" in self.nuceli_data[k].keys():
3060                    self.nuceli_data[k].pop("coords")
3061
3062        else:
3063            self.nuceli_data = nuclei_data
3064
3065            for k in self.nuceli_data.keys():
3066                if "coords" in self.nuceli_data[k].keys():
3067                    self.nuceli_data[k].pop("coords")
3068
3069        self.experiment_name = experiment_name
3070        """Name of the experiment."""
3071
3072        self.nuceli_data_df = None
3073        """Stored DataFrame representation of nuclei features"""
3074
3075        self.nuclei_IS_data = None
3076        """Stored DataFrame of data from Image Stream (IS)."""
3077
3078        self.concat_data = None
3079        """Sotored list of other `NucleiDataManagement` objects."""

Initialize a NucleiDataManagement object with nuclei data and experiment name.

Parameters

nuclei_data : dict Dictionary containing nuclei properties for each image or experiment. If the dictionary entries have keys 'stats' and 'img', only 'stats' are stored.

experiment_name : str Name of the experiment.

Attributes

nuceli_data : dict Dictionary storing nuclei properties for each image or experiment.

experiment_name : str Name of the experiment.

nuceli_data_df : pd.DataFrame or None DataFrame representation of nuclei properties (initialized as None).

nuclei_IS_data : pd.DataFrame or None DataFrame of nuclei data merged with Image Stream (IS) data (initialized as None).

concat_data : list or None List of other NucleiDataManagement objects added for combined analysis (initialized as None).

experiment_name

Name of the experiment.

nuceli_data_df

Stored DataFrame representation of nuclei features

nuclei_IS_data

Stored DataFrame of data from Image Stream (IS).

concat_data

Sotored list of other NucleiDataManagement objects.

@classmethod
def load_nuc_dict(cls, path: str):
3081    @classmethod
3082    def load_nuc_dict(cls, path: str):
3083        """
3084        Initialize a NucleiDataManagement object from a JSON dictionary file.
3085
3086        The loaded data must be previously saved using the ``save_nuc_project()`` method.
3087
3088        Parameters
3089        ----------
3090        path : str
3091            Path to the *.nuc JSON file containing nuclei data.
3092        """
3093
3094        if ".nuc" in path:
3095
3096            if os.path.exists(path):
3097
3098                with open(path, "r") as json_file:
3099                    loaded_data = json.load(json_file)
3100
3101                return cls(loaded_data, os.path.splitext(os.path.basename(path))[0])
3102
3103            else:
3104                raise ValueError("\nInvalid path!")
3105
3106        else:
3107            raise ValueError(
3108                "\nInvalid dictionary to load. It must contain a .nuc extension!"
3109            )

Initialize a NucleiDataManagement object from a JSON dictionary file.

The loaded data must be previously saved using the save_nuc_project() method.

Parameters

path : str Path to the *.nuc JSON file containing nuclei data.

def add_IS_data(self, IS_data: pandas.core.frame.DataFrame, IS_features: list = []):
3222    def add_IS_data(self, IS_data: pd.DataFrame, IS_features: list = []):
3223        """
3224        Merge Image Stream (IS) data with nuclei analysis data.
3225
3226        This method concatenates IS (Image Stream, https://cytekbio.com/pages/imagestream)
3227        results with the nuclei data stored in the object. The merge is performed based
3228        on object IDs, allowing joint analysis of nuclei features and IS features.
3229
3230        Parameters
3231        ----------
3232        IS_data : pd.DataFrame
3233            DataFrame containing IS data results.
3234
3235        IS_features : list, optional
3236            List of features to extract from the IS data. Default is an empty list.
3237
3238        Notes
3239        -----
3240        The merged data will be stored in the attribute `self.nuclei_IS_data`.
3241        """
3242
3243        nuclei_data = self._get_df()
3244
3245        IS_data["set"] = self.experiment_name
3246
3247        if len(IS_features) > 0:
3248            IS_features = list(set(IS_features + ["Object Number", "set"]))
3249            IS_data = IS_data[IS_features]
3250
3251        nuclei_data["id"] = (
3252            nuclei_data["id_name"].astype(str) + "_" + nuclei_data["set"]
3253        )
3254        IS_data["id"] = IS_data["Object Number"].astype(str) + "_" + IS_data["set"]
3255
3256        merged_data = pd.merge(nuclei_data, IS_data, on="id", how="left")
3257        merged_data.pop("set_x")
3258        merged_data = merged_data.rename(columns={"set_y": "set"})
3259
3260        self.nuclei_IS_data = merged_data

Merge Image Stream (IS) data with nuclei analysis data.

This method concatenates IS (Image Stream, https://cytekbio.com/pages/imagestream) results with the nuclei data stored in the object. The merge is performed based on object IDs, allowing joint analysis of nuclei features and IS features.

Parameters

IS_data : pd.DataFrame DataFrame containing IS data results.

IS_features : list, optional List of features to extract from the IS data. Default is an empty list.

Notes

The merged data will be stored in the attribute self.nuclei_IS_data.

def get_data_with_IS(self):
3275    def get_data_with_IS(self):
3276        """
3277        Retrieve nuclei results for a single project including IS data.
3278
3279        Returns
3280        -------
3281        pd.DataFrame or None
3282            DataFrame containing nuclei results merged with IS (Image Stream) data
3283            added via `self.add_IS_data()`. Returns None if no IS data has been added.
3284        """
3285
3286        if self.nuclei_IS_data is None:
3287            print("\nNothing to return!")
3288        return self.nuclei_IS_data

Retrieve nuclei results for a single project including IS data.

Returns

pd.DataFrame or None DataFrame containing nuclei results merged with IS (Image Stream) data added via self.add_IS_data(). Returns None if no IS data has been added.

def get_data(self):
3290    def get_data(self):
3291        """
3292        Retrieve nuclei results for a single project as a pandas DataFrame.
3293
3294        Returns
3295        -------
3296        pd.DataFrame
3297            DataFrame containing nuclei analysis results for the experiment.
3298        """
3299
3300        return self._get_df()

Retrieve nuclei results for a single project as a pandas DataFrame.

Returns

pd.DataFrame DataFrame containing nuclei analysis results for the experiment.

def save_nuc_project(self, path: str = ''):
3302    def save_nuc_project(self, path: str = ""):
3303        """
3304        Save nuclei results as a JSON file with a *.nuc extension.
3305
3306        The saved data can later be loaded using the `cls.load_nuc_dict()` method.
3307        Results must be obtained from the `NucleiFinder` class using
3308        `series_analysis_nuclei()` or `series_analysis_chromatinization()` methods.
3309
3310        Parameters
3311        ----------
3312        path : str, optional
3313            Directory where the results will be saved. Default is the current working directory.
3314        """
3315
3316        data = self.nuceli_data
3317
3318        if len(data.keys()) > 0:
3319            full_path = os.path.join(path, self.experiment_name)
3320
3321            with open(full_path + ".nuc", "w") as json_file:
3322                json.dump(data, json_file, indent=4)
3323        else:
3324            print("\nData not provided!")

Save nuclei results as a JSON file with a *.nuc extension.

The saved data can later be loaded using the cls.load_nuc_dict() method. Results must be obtained from the NucleiFinder class using series_analysis_nuclei() or series_analysis_chromatinization() methods.

Parameters

path : str, optional Directory where the results will be saved. Default is the current working directory.

def save_results_df(self, path: str = ''):
3326    def save_results_df(self, path: str = ""):
3327        """
3328        Save nuclei results for a single project as a CSV file.
3329
3330        Results must be obtained from the `NucleiFinder` class using
3331        `series_analysis_nuclei()` or `series_analysis_chromatinization()` methods.
3332
3333        Parameters
3334        ----------
3335        path : str, optional
3336            Directory where the CSV file will be saved. Default is the current working directory.
3337        """
3338
3339        data = self.get_data()
3340
3341        full_path = os.path.join(path, f"{self.experiment_name}.csv")
3342
3343        data.to_csv(full_path, index=False)

Save nuclei results for a single project as a CSV file.

Results must be obtained from the NucleiFinder class using series_analysis_nuclei() or series_analysis_chromatinization() methods.

Parameters

path : str, optional Directory where the CSV file will be saved. Default is the current working directory.

def save_results_df_with_IS(self, path: str = ''):
3345    def save_results_df_with_IS(self, path: str = ""):
3346        """
3347        Save nuclei results with IS data for a single project as a CSV file.
3348
3349        Results must be obtained from the `NucleiFinder` class using
3350        `series_analysis_nuclei()` or `series_analysis_chromatinization()` methods.
3351        IS data should have been added via `self.add_IS_data()`.
3352
3353        Parameters
3354        ----------
3355        path : str, optional
3356            Directory where the CSV file will be saved. Default is the current working directory.
3357        """
3358
3359        data = self.get_data_with_IS()
3360
3361        if data is None:
3362            raise ValueError("There was nothing to save.")
3363
3364        full_path = os.path.join(path, f"{self.experiment_name}_IS.csv")
3365        data.to_csv(full_path, index=False)

Save nuclei results with IS data for a single project as a CSV file.

Results must be obtained from the NucleiFinder class using series_analysis_nuclei() or series_analysis_chromatinization() methods. IS data should have been added via self.add_IS_data().

Parameters

path : str, optional Directory where the CSV file will be saved. Default is the current working directory.

def add_experiment(self, data_list: list):
3367    def add_experiment(self, data_list: list):
3368        """
3369        Add additional NucleiDataManagement objects from other experiments for concatenation.
3370
3371        Parameters
3372        ----------
3373        data_list : list
3374            List of `NucleiDataManagement` objects from separate experiments to be added.
3375        """
3376
3377        valid_class = []
3378        for obj in data_list:
3379            if isinstance(obj, self.__class__):
3380                valid_class.append(obj)
3381            else:
3382                print(f"Object {obj} is invalid type.")
3383
3384        self.concat_data = valid_class

Add additional NucleiDataManagement objects from other experiments for concatenation.

Parameters

data_list : list List of NucleiDataManagement objects from separate experiments to be added.

def get_mutual_experiments_data(self, inc_is: bool = False):
3386    def get_mutual_experiments_data(self, inc_is: bool = False):
3387        """
3388        Retrieve concatenated NucleiDataManagement data from other added experiments.
3389
3390        Parameters
3391        ----------
3392        inc_is : bool, optional
3393            Whether to include IS (Image Stream) data, if it was added to each experiment. Default is False.
3394
3395        Returns
3396        -------
3397        pd.DataFrame
3398            Concatenated nuclei data (with or without IS data) from all added experiments.
3399        """
3400
3401        if self.concat_data is not None:
3402            if inc_is:
3403
3404                try:
3405                    final_df = pd.concat(
3406                        [x.get_data_with_IS() for x in self.concat_data]
3407                        + [self.get_data_with_IS()]
3408                    )
3409                except:
3410                    raise ValueError(
3411                        "Lack of IS data in some object. Check if the IS data was added to each project."
3412                    )
3413
3414            else:
3415                final_df = pd.concat(
3416                    [x.get_data() for x in self.concat_data] + [self.get_data()]
3417                )
3418
3419            return final_df
3420
3421        raise ValueError("No object to concatenate. Nothing to return!")

Retrieve concatenated NucleiDataManagement data from other added experiments.

Parameters

inc_is : bool, optional Whether to include IS (Image Stream) data, if it was added to each experiment. Default is False.

Returns

pd.DataFrame Concatenated nuclei data (with or without IS data) from all added experiments.

def save_mutual_experiments(self, path: str = '', inc_is: bool = False):
3423    def save_mutual_experiments(self, path: str = "", inc_is: bool = False):
3424        """
3425        Save concatenated NucleiDataManagement data from added experiments as a CSV file.
3426
3427        Parameters
3428        ----------
3429        inc_is : bool, optional
3430            Whether to include IS (Image Stream) data, if it was added to each experiment. Default is False.
3431        """
3432
3433        dt = self.get_mutual_experiments_data(inc_is=inc_is)
3434
3435        experimets = [self.experiment_name] + [
3436            n.experiment_name for n in self.concat_data
3437        ]
3438
3439        experimets_names = "_".join(experimets)
3440
3441        if inc_is:
3442            full_path = os.path.join(path, f"{experimets_names}_IS.csv")
3443        else:
3444            full_path = os.path.join(path, f"{experimets_names}.csv")
3445
3446        dt.to_csv(full_path, index=False)

Save concatenated NucleiDataManagement data from added experiments as a CSV file.

Parameters

inc_is : bool, optional Whether to include IS (Image Stream) data, if it was added to each experiment. Default is False.

class GroupAnalysis:
3449class GroupAnalysis:
3450    """
3451    A class for performing multivariate analysis, dimensionality reduction,
3452    clustering, and differential feature analysis (DFA) on biological or
3453    experimental datasets.
3454
3455    This class provides tools for:
3456    - Scaling and PCA of input data
3457    - UMAP embedding and DBSCAN clustering
3458    - Differential Feature Analysis across groups
3459    - Proportion analysis and plotting
3460    - Data selection and merging with metadata
3461
3462    Attributes
3463    ----------
3464    input_data : pd.DataFrame
3465        The primary dataset containing features for analysis.
3466
3467    input_metadata : pd.DataFrame
3468        Metadata corresponding to the input data, including identifiers and group labels.
3469
3470    tmp_data : pd.DataFrame
3471        Temporary copy of the input data, used for feature selection and filtering.
3472
3473    tmp_metadata : pd.DataFrame
3474        Temporary copy of metadata, used for filtered or subsetted operations.
3475
3476    scaled_data : np.ndarray or None
3477        Scaled version of the temporary dataset (`tmp_data`), updated after `data_scale()`.
3478
3479    PCA_results : np.ndarray or None
3480        Results of PCA transformation applied on scaled data.
3481
3482    var_data : np.ndarray or None
3483        Explained variance ratio from PCA.
3484
3485    knee_plot : matplotlib.figure.Figure or None
3486        Figure of cumulative explained variance for PCA components.
3487
3488    UMAP_data : np.ndarray or None
3489        Embedding results from UMAP dimensionality reduction.
3490
3491    UMAP_plot : dict
3492        Dictionary containing UMAP plots. Keys: 'static' (matplotlib) and 'html' (plotly).
3493
3494    dblabels : list or None
3495        Cluster labels assigned by DBSCAN after UMAP embedding.
3496
3497    explained_variance_ratio : np.ndarray or None
3498        Explained variance ratio of PCA components.
3499
3500    DFA_results : pd.DataFrame or None
3501        Results of Differential Feature Analysis (DFA).
3502
3503    proportion_stats : pd.DataFrame or None
3504        Statistics from proportion analysis.
3505
3506    proportion_plot : matplotlib.figure.Figure or None
3507        Figure of proportion analysis results.
3508
3509    Methods
3510    -------
3511    resest_project():
3512        Reset all temporary and analysis results to initial state.
3513
3514    load_data(data, ids_col='id_name', set_col='set'):
3515        Class method to load data and metadata and initialize the object.
3516
3517    groups:
3518        Property returning available groups in the metadata.
3519
3520    get_DFA(), get_PCA(), get_knee_plot(), get_var_data(), get_scaled_data():
3521        Methods to retrieve previously computed results.
3522
3523    UMAP(), db_scan(), UMAP_on_clusters():
3524        Methods for dimensionality reduction and clustering visualization.
3525
3526    DFA(meta_group_by='sets', sets={}, n_proc=5):
3527        Perform Differential Feature Analysis.
3528
3529    proportion_analysis(grouping_col='sets', val_col='nuclei_per_img', ...):
3530        Perform and plot proportion analysis across groups.
3531    """
3532
3533    def __init__(
3534        self,
3535        input_data,
3536        input_metadata,
3537    ):
3538        """
3539        Initialize a GroupAnalysis instance with data and metadata.
3540
3541        Parameters
3542        ----------
3543        input_data : pd.DataFrame
3544            Dataset containing features for analysis. Rows represent samples and columns represent features.
3545
3546        input_metadata : pd.DataFrame
3547            Metadata corresponding to `input_data`, including sample identifiers and group labels.
3548        """
3549
3550        self.input_data = input_data
3551        """Stored input dataset for analysis."""
3552
3553        self.input_metadata = input_metadata
3554        """Stored metadata associated with `input_data`."""
3555
3556        self.tmp_metadata = input_metadata
3557        """Temporary copy of `input_data` used for filtering, selection, or scaling."""
3558
3559        self.tmp_data = input_data
3560        """Temporary copy of `input_metadata` used for filtered operations."""
3561
3562        self.scaled_data = None
3563        """Stored scaled version of `tmp_data` after normalization or standardization."""
3564
3565        self.PCA_results = None
3566        """ Stored results of PCA transformation applied on `scaled_data`."""
3567
3568        self.var_data = None
3569        """Sotred explained variance ratio for PCA components."""
3570
3571        self.knee_plot = None
3572        """Figure showing cumulative explained variance for PCA."""
3573
3574        self.UMAP_data = None
3575        """Stored embedding coordinates from UMAP dimensionality reduction."""
3576
3577        self.UMAP_plot = {"static": {}, "html": {}}
3578        """Stored dictionary containing UMAP plots: 'static' (matplotlib) and 'html' (plotly)."""
3579
3580        self.dblabels = None
3581        """Stored cluster labels assigned by DBSCAN after UMAP embedding."""
3582
3583        self.explained_variance_ratio = None
3584        """Stored explained variance ratio of PCA components."""
3585
3586        self.DFA_results = None
3587        """Stored Differential Feature Analysis (DFA) results."""
3588
3589        self.proportion_stats = None
3590        """Stored statistics from proportion analysis of groups."""
3591
3592        self.proportion_plot = None
3593        """Figure visualizing proportion analysis results."""
3594
3595    def resest_project(self):
3596        """
3597        Resets the project state by clearing or reinitializing various attributes.
3598
3599        This method resets the following attributes to initial values:
3600        - `tmp_metadata`
3601        - `tmp_data`
3602        - `scaled_data`
3603        - `PCA_results`
3604        - `var_data`
3605        - `knee_plot`
3606        - `UMAP_data`
3607        - `UMAP_plot`
3608        - `dblabels`
3609        - `explained_variance_ratio`
3610        - `DFA_results`
3611
3612        This method is typically called to reinitialize the project data and results, preparing the system for new computations or project resets.
3613        """
3614
3615        self.tmp_metadata = self.input_metadata
3616        self.tmp_data = self.input_data
3617        self.scaled_data = None
3618        self.PCA_results = None
3619        self.var_data = None
3620        self.knee_plot = None
3621        self.UMAP_data = None
3622        self.UMAP_plot = {"static": {}, "html": {}}
3623        self.dblabels = None
3624        self.explained_variance_ratio = None
3625        self.DFA_results = None
3626        self.proportion_stats = None
3627        self.proportion_plot = None
3628
3629    @classmethod
3630    def load_data(cls, data, ids_col: str = "id_name", set_col: str = "set"):
3631        """
3632        Load data and initialize the class by storing both the feature data and metadata.
3633
3634        Parameters
3635        ----------
3636        data : pd.DataFrame
3637            Input dataset used for group analysis. Must contain both feature columns and
3638            metadata columns specified by `ids_col` and `set_col`.
3639
3640        ids_col : str, optional
3641            Name of the column containing unique object identifiers.
3642            Default is ``'id_name'``.
3643
3644        set_col : str, optional
3645            Name of the column specifying group or set assignment for each object.
3646            Default is ``'set'``.
3647
3648        Notes
3649        -----
3650        This method performs in-place initialization of the class and does not return
3651        a separate object. All loaded data and metadata become available through the
3652        class attributes for downstream analysis.
3653
3654        This method updates internal class attributes:
3655
3656        - **input_data** : pd.DataFrame
3657          Cleaned feature table with index set to object IDs.
3658
3659        - **tmp_data** : pd.DataFrame
3660          Copy of `input_data` used for temporary operations.
3661
3662        - **input_metadata** : pd.DataFrame
3663          Metadata containing object IDs and group assignments.
3664
3665        - **tmp_metadata** : pd.DataFrame
3666          Copy of `input_metadata` for temporary operations.
3667        """
3668
3669        data = data.dropna()
3670
3671        metadata = pd.DataFrame()
3672        metadata["id"] = data[ids_col]
3673        metadata["sets"] = data[set_col]
3674
3675        data.index = data[ids_col]
3676
3677        try:
3678            data.pop("id_name")
3679        except:
3680            None
3681
3682        try:
3683            data.pop("Object Number")
3684        except:
3685            None
3686
3687        return cls(data, metadata)
3688
3689    @property
3690    def groups(self):
3691        """
3692        Return information about available groups in the metadata for ``self.DFA``.
3693
3694        Returns
3695        -------
3696        dict
3697            Dictionary mapping each metadata column name to a list of unique groups
3698            available in that column.
3699        """
3700
3701        try:
3702            return {
3703                "sets": set(self.tmp_metadata["sets"]),
3704                "full_name": set(self.tmp_metadata["full_name"]),
3705            }
3706        except:
3707            return {"sets": set(self.tmp_metadata["sets"])}
3708
3709    def get_DFA(self):
3710        """
3711        Retrieve the DFA results produced by the ``DFA()`` method.
3712
3713        Returns
3714        -------
3715        pd.DataFrame
3716            The DFA results stored in ``self.DFA_results``.
3717        """
3718
3719        if None in self.DFA_results:
3720            print("\nNo results to return! Please run the DFA() method first.")
3721        else:
3722            return self.DFA_results
3723
3724    def get_PCA(self):
3725        """
3726        Retrieve the PCA results produced by the ``PCA()`` method.
3727
3728        Returns
3729        -------
3730        np.ndarray
3731            The PCA results stored in ``self.PCA_results``.
3732        """
3733
3734        if None in self.PCA_results:
3735            print("\nNo results to return! Please run the PCA() method first.")
3736        else:
3737            return self.PCA_results
3738
3739    def get_knee_plot(self, show: bool = True):
3740        """
3741        Retrieve the knee plot of cumulative explained variance generated by the ``var_plot()`` method.
3742
3743        Parameters
3744        ----------
3745        show : bool, optional
3746            If ``True`` (default), the knee plot is displayed.
3747
3748        Returns
3749        -------
3750        matplotlib.figure.Figure
3751            The figure object containing the knee plot.
3752        """
3753
3754        if self.knee_plot is None:
3755            print("\nNo results to return! Please run the var_plot() method first.")
3756        else:
3757            if cfg._DISPLAY_MODE:
3758                if show is True:
3759                    self.knee_plot
3760                    try:
3761                        display(self.knee_plot)
3762                    except:
3763                        None
3764
3765            return self.knee_plot
3766
3767    def get_var_data(self):
3768        """
3769        Retrieve the explained variance data from the ``var_plot()`` method.
3770
3771        Returns
3772        -------
3773        np.ndarray
3774            Array containing the explained variance values stored in ``self.var_data``.
3775        """
3776
3777        if None in self.var_data:
3778            print("\nNo results to return! Please run the var_plot() method first.")
3779        else:
3780            return self.var_data
3781
3782    def get_scaled_data(self):
3783        """
3784        Retrieve the scaled data produced by the ``data_scale()`` method.
3785
3786        Returns
3787        -------
3788        np.ndarray
3789            Scaled data stored in ``self.scaled_data``.
3790        """
3791
3792        if None in self.scaled_data:
3793            print("\nNo results to return! Please run the data_scale() method first.")
3794        else:
3795            return self.scaled_data
3796
3797    def get_UMAP_data(self):
3798        """
3799        Retrieve the UMAP-transformed data generated by the ``UMAP()`` method.
3800
3801        Returns
3802        -------
3803        np.ndarray
3804            UMAP-embedded data stored in ``self.UMAP_data``.
3805        """
3806
3807        if None in self.UMAP_data:
3808            print("\nNo results to return! Please run the UMAP() method first.")
3809        else:
3810            return self.UMAP_data
3811
3812    def get_UMAP_plots(self, plot_type: str = "static", show: bool = True):
3813        """
3814        Retrieve UMAP plots generated by the ``UMAP()`` and/or ``UMAP_on_clusters()`` methods.
3815
3816        Parameters
3817        ----------
3818        show : bool, optional
3819            Whether to display the UMAP plots. Default is True.
3820
3821        Returns
3822        -------
3823        dict of matplotlib.figure.Figure
3824            A dictionary containing the UMAP plots. Keys correspond to plot names, and values are the figure objects.
3825        """
3826
3827        if plot_type == "html":
3828
3829            if len(self.UMAP_plot["html"].keys()) == 0:
3830                print(
3831                    "\nNo results to return! Please run the UMAP() and / or UMAP_on_clusters() methods first."
3832                )
3833            else:
3834                if cfg._DISPLAY_MODE:
3835                    if show:
3836                        for k in self.UMAP_plot["html"].keys():
3837                            self.UMAP_plot["html"][k]
3838                            try:
3839                                display(self.UMAP_plot["html"][k])
3840                            except:
3841                                None
3842
3843                return self.UMAP_plot["html"]
3844
3845        else:
3846
3847            if len(self.UMAP_plot["static"].keys()) == 0:
3848                print(
3849                    "\nNo results to return! Please run the UMAP() and / or UMAP_on_clusters() methods first."
3850                )
3851            else:
3852                if cfg._DISPLAY_MODE:
3853                    if show:
3854                        for k in self.UMAP_plot["static"].keys():
3855                            self.UMAP_plot["static"][k]
3856                            try:
3857                                display(self.UMAP_plot["static"][k])
3858                            except:
3859                                None
3860
3861                return self.UMAP_plot["static"]
3862
3863    def select_data(self, features_list: list = []):
3864        """
3865        Select specific features (columns) from the dataset for further analysis.
3866
3867        Parameters
3868        ----------
3869        features_list : list of str, optional
3870            List of feature names (column names) to select from the dataset. Default is an empty list, which selects no features.
3871
3872        Notes
3873        -----
3874        Modifies the `self.tmp_data` attribute to contain only the selected features from `self.input_data`.
3875        """
3876
3877        dat = self.input_data.copy()
3878
3879        not_in_columns = [name for name in features_list if name not in dat.columns]
3880
3881        if not_in_columns:
3882            print("These names are not in data", not_in_columns)
3883        else:
3884            print("All names are present in data.")
3885
3886        in_columns = [name for name in features_list if name in dat.columns]
3887
3888        dat = dat[in_columns]
3889
3890        self.tmp_data = dat
3891
3892    def data_scale(self):
3893        """
3894        Scale the data using standardization (z-score normalization).
3895
3896        This method applies `StandardScaler` from scikit-learn to the temporary dataset (`self.tmp_data`) and stores the scaled data.
3897
3898        Notes
3899        -----
3900        Modifies the `self.scaled_data` attribute to contain the standardized version of `self.tmp_data`.
3901        """
3902
3903        if None not in self.tmp_data:
3904
3905            def is_id_column(name: str):
3906                name_lower = name.lower()
3907                return name_lower == "id" or "id_" in name_lower or "_id" in name_lower
3908
3909            tmp = self.tmp_data
3910
3911            cols_with_strings = [
3912                c
3913                for c in tmp.columns
3914                if tmp[c].apply(lambda x: isinstance(x, str)).any()
3915            ]
3916
3917            cols_id_pattern = [c for c in tmp.columns if is_id_column(c)]
3918
3919            cols_to_drop = list(set(cols_id_pattern + cols_with_strings))
3920
3921            tmp = tmp.drop(columns=cols_to_drop)
3922
3923            scaler = StandardScaler()
3924
3925            self.scaled_data = scaler.fit_transform(tmp)
3926
3927        else:
3928            print(
3929                "\nNo data to scale. Please use the load_data() method first, and optionally the select_data() method."
3930            )
3931
3932    def PCA(self):
3933        """
3934        Perform Principal Component Analysis (PCA) on the scaled data.
3935
3936        This method reduces the dimensionality of `self.scaled_data` while retaining the maximum variance.
3937
3938        Notes
3939        -----
3940        Modifies the `self.PCA_results` attribute with the PCA-transformed data.
3941        """
3942
3943        if None not in self.scaled_data:
3944            pca = PCA(n_components=self.scaled_data.shape[1])
3945            self.PCA_results = pca.fit_transform(self.scaled_data)
3946            self.explained_variance_ratio = pca.explained_variance_ratio_
3947        else:
3948            print("\nNo data for PCA. Please use the data_scale() method first.")
3949
3950    def var_plot(self):
3951        """
3952        Plot the cumulative explained variance of the principal components from PCA.
3953
3954        This method visualizes the cumulative explained variance to help determine how many components capture most of the variance.
3955
3956        Notes
3957        -----
3958        Stores results in the following attributes:
3959        - `self.var_data` (np.ndarray): Explained variance ratio for each principal component.
3960        - `self.knee_plot` (matplotlib.figure.Figure): Figure of the cumulative explained variance plot.
3961        """
3962
3963        if None not in self.PCA_results:
3964
3965            fig, _ = plt.subplots(figsize=(15, 7))
3966            explained_var = self.explained_variance_ratio
3967
3968            cumulative_var = np.cumsum(explained_var)
3969
3970            # Plot the cumulative explained variance as a function of the number of components
3971            plt.plot(cumulative_var)
3972            plt.xlabel("Number of Components")
3973            plt.ylabel("Cumulative Explained Variance")
3974            plt.title("Explained variance of PCs")
3975            plt.xticks(np.arange(0, len(cumulative_var) + 1, step=1))
3976
3977            self.var_data = explained_var
3978            self.knee_plot = fig
3979
3980        else:
3981
3982            print(
3983                "\nNo data for variance explanation analysis. Please use the PCA() method first."
3984            )
3985
3986    def UMAP(
3987        self,
3988        PC_num: int = 5,
3989        factorize_with_metadata: bool = False,
3990        harmonize_sets: bool = True,
3991        n_neighbors: int = 25,
3992        min_dist: float = 0.01,
3993        n_components: int = 2,
3994        width: int = 8,
3995        height: int = 6,
3996    ):
3997        """
3998         Perform UMAP (Uniform Manifold Approximation and Projection) dimensionality reduction on PCA results.
3999
4000         UMAP is applied to the top principal components, optionally using metadata labels to influence the embedding. Generates both 2D/3D embeddings and visualizations.
4001
4002         Parameters
4003         ----------
4004         PC_num : int, optional
4005             Number of top principal components to use for UMAP embedding. Default is 5.
4006
4007         factorize_with_metadata : bool, optional
4008             Whether to use metadata (e.g., 'sets') to factorize UMAP embedding. Default is False.
4009
4010        harmonize_sets : bool, optional
4011             If True, applies harmonization across data sets before computing the UMAP embedding.
4012             Default is True.
4013
4014         n_neighbors : int, optional
4015             Number of neighbors for UMAP to compute local structure. Default is 25.
4016
4017         min_dist : float, optional
4018             Minimum distance between points in the low-dimensional embedding. Default is 0.01.
4019
4020         n_components : int, optional
4021             Number of dimensions for the UMAP embedding. Default is 2.
4022
4023         width : int, optional
4024             Width of the generated matplotlib figures (in inches). Default is 8.
4025
4026         height : int, optional
4027             Height of the generated matplotlib figures (in inches). Default is 6.
4028
4029         Notes
4030         -----
4031         Stores results in the following attributes:
4032         - `self.UMAP_data` (np.ndarray): UMAP-transformed data.
4033         - `self.UMAP_plot['static']['PrimaryUMAP']` (matplotlib.figure.Figure): Static visualization of UMAP embedding.
4034         - `self.UMAP_plot['html']['PrimaryUMAP']` (plotly.graph_objs.Figure): Interactive Plotly visualization of UMAP embedding.
4035        """
4036
4037        if None not in self.PCA_results:
4038
4039            reducer = umap.UMAP(
4040                n_neighbors=n_neighbors,
4041                min_dist=min_dist,
4042                n_components=n_components,
4043                random_state=42,
4044            )
4045
4046            pca_res = self.PCA_results
4047
4048            if harmonize_sets:
4049
4050                pca_res = np.array(pca_res)
4051
4052                pca_res = np.array(
4053                    harmonize.run_harmony(
4054                        pca_res, self.input_metadata, vars_use="sets"
4055                    ).Z_corr
4056                ).T
4057
4058            if factorize_with_metadata:
4059                numeric_labels = pd.Categorical(self.tmp_metadata["sets"]).codes
4060
4061                umap_result = reducer.fit_transform(
4062                    pca_res[:, : PC_num + 1], y=numeric_labels
4063                )
4064
4065            else:
4066                umap_result = reducer.fit_transform(pca_res[:, : PC_num + 1])
4067
4068            umap_result_plot = pd.DataFrame(umap_result.copy())
4069
4070            umap_result_plot["clusters"] = list(self.tmp_metadata["sets"])
4071
4072            static_fig = umap_static(umap_result_plot, width=width, height=height)
4073
4074            html_fig = umap_html(
4075                umap_result_plot, width=width * 100, height=height * 100
4076            )
4077
4078            self.UMAP_data = umap_result
4079
4080            self.UMAP_plot["static"]["PrimaryUMAP"] = static_fig
4081            self.UMAP_plot["html"]["PrimaryUMAP"] = html_fig
4082
4083        else:
4084
4085            print("\nNo data for UMAP. Please use the PCA() method first.")
4086
4087    def db_scan(self, eps=0.5, min_samples: int = 10):
4088        """
4089        Perform DBSCAN clustering on UMAP-transformed data.
4090
4091        DBSCAN identifies clusters based on density, labeling points in dense regions as clusters and others as noise.
4092
4093        Parameters
4094        ----------
4095        eps : float, optional
4096            Maximum distance between two points to be considered neighbors. Default is 0.5.
4097
4098        min_samples : int, optional
4099            Minimum number of points required to form a dense region (cluster). Default is 10.
4100
4101        Notes
4102        -----
4103        Stores the results in the following attribute:
4104        - `self.dblabels` (list of str): Cluster labels assigned by DBSCAN for each point in the UMAP embedding.
4105        """
4106
4107        if None not in self.UMAP_data:
4108
4109            dbscan = DBSCAN(eps=eps, min_samples=min_samples)
4110            dbscan_labels = dbscan.fit_predict(self.UMAP_data)
4111            self.dblabels = [str(x) for x in dbscan_labels]
4112
4113        else:
4114
4115            print("\nNo data for DBSCAN. Please use the UMAP() method first.")
4116
4117    def UMAP_on_clusters(
4118        self,
4119        min_entities: int = 50,
4120        width: int = 8,
4121        height: int = 6,
4122        n_per_col: int = 20,
4123    ):
4124        """
4125        Generate UMAP visualizations for clusters filtered by a minimum entity threshold.
4126
4127        This method removes clusters containing fewer than `min_entities` observations
4128        and produces two UMAP visualizations:
4129
4130        1. **Cluster UMAP** – points colored by cluster assignment only.
4131        2. **Cluster × Set UMAP** – points colored by the combination of cluster and set identifier.
4132
4133        Parameters
4134        ----------
4135        min_entities : int, optional
4136            Minimum number of entities required for a cluster to be included
4137            in the visualization. Default is 50.
4138
4139        width : int, optional
4140            Width of the generated matplotlib figures (in inches). Default is 8.
4141
4142        height : int, optional
4143            Height of the generated matplotlib figures (in inches). Default is 6.
4144
4145        n_per_col : int, optional
4146            Maximum number of legend entries per column. Default is 20.
4147
4148        Notes
4149        -----
4150        This method updates the following attributes:
4151
4152        - `self.UMAP_plot['static']['ClusterUMAP']`
4153          Static matplotlib figure of the filtered cluster-only UMAP.
4154
4155        - `self.UMAP_plot['html']['ClusterUMAP']`
4156          Interactive HTML version of the cluster-only UMAP.
4157
4158        - `self.UMAP_plot['static']['ClusterXSetsUMAP']`
4159          Static matplotlib figure showing clusters combined with set identifiers.
4160
4161        - `self.UMAP_plot['html']['ClusterXSetsUMAP']`
4162          Interactive HTML version of the cluster × set visualization.
4163
4164        - `self.tmp_data`
4165          Dataset filtered to include only clusters meeting the `min_entities` threshold.
4166
4167        - `self.tmp_metadata`
4168          Metadata corresponding to the filtered dataset.
4169        """
4170
4171        if None not in self.UMAP_data:
4172
4173            if hasattr(self, "_tmp_data_old"):
4174                self.tmp_data = self._tmp_data_old
4175
4176            if hasattr(self, "_tmp_metadata_old"):
4177                self.tmp_metadata = self._tmp_metadata_old
4178
4179            umap_result = pd.DataFrame(self.UMAP_data.copy())
4180            umap_result["id"] = self.tmp_metadata.index
4181            umap_result["clusters"] = self.dblabels
4182            umap_result = umap_result[umap_result["clusters"] != "-1"]
4183            tmp_metadata = self.tmp_metadata.copy()
4184            tmp_metadata["clusters"] = self.dblabels
4185            tmp_metadata = tmp_metadata[tmp_metadata["clusters"] != "-1"]
4186            tmp_data = self.tmp_data.copy()
4187            tmp_data.index = self.dblabels
4188            tmp_data = tmp_data[tmp_data.index != "-1"]
4189
4190            label_counts_dict = Counter(self.dblabels)
4191
4192            label_counts = pd.DataFrame.from_dict(
4193                label_counts_dict, orient="index", columns=["count"]
4194            )
4195
4196            filtered_counts = label_counts[label_counts["count"] > min_entities]
4197
4198            tmp_metadata["full_id"] = list(
4199                tmp_metadata["id"].astype(str) + " # " + tmp_metadata["sets"]
4200            )
4201
4202            tmp_data.index = tmp_metadata["full_id"]
4203            umap_result["full_id"] = list(tmp_metadata["full_id"])
4204
4205            umap_result = umap_result[
4206                umap_result["clusters"].isin(np.array(filtered_counts.index))
4207            ]
4208            tmp_metadata = tmp_metadata[
4209                tmp_metadata["clusters"].isin(np.array(filtered_counts.index))
4210            ]
4211
4212            umap_result = umap_result.sort_values(
4213                by="clusters", key=lambda x: x.astype(int)
4214            )
4215
4216            tmp_data = tmp_data[tmp_data.index.isin(np.array(tmp_metadata["full_id"]))]
4217
4218            static_fig = umap_static(
4219                umap_result, width=width, height=height, n_per_col=n_per_col
4220            )
4221
4222            html_fig = umap_html(umap_result, width=width * 100, height=height * 100)
4223
4224            self.UMAP_plot["static"]["ClusterUMAP"] = static_fig
4225            self.UMAP_plot["html"]["ClusterUMAP"] = html_fig
4226
4227            tmp_metadata["full_name"] = list(
4228                tmp_metadata["clusters"] + " # " + tmp_metadata["sets"]
4229            )
4230
4231            label_counts_dict = Counter(list(tmp_metadata["full_name"]))
4232
4233            label_counts = pd.DataFrame.from_dict(
4234                label_counts_dict, orient="index", columns=["count"]
4235            )
4236
4237            filtered_counts = label_counts[label_counts["count"] > min_entities]
4238
4239            tmp_data.index = tmp_metadata["full_name"]
4240            umap_result["clusters"] = list(tmp_metadata["full_name"])
4241
4242            umap_result = umap_result[
4243                umap_result["clusters"].isin(np.array(filtered_counts.index))
4244            ]
4245
4246            tmp_metadata = tmp_metadata[
4247                tmp_metadata["full_name"].isin(np.array(filtered_counts.index))
4248            ]
4249
4250            tmp_data = tmp_data[tmp_data.index.isin(np.array(filtered_counts.index))]
4251
4252            static_fig = umap_static(
4253                umap_result, width=width, height=height, n_per_col=n_per_col
4254            )
4255
4256            html_fig = umap_html(umap_result, width=width * 100, height=height * 100)
4257
4258            self.UMAP_plot["static"]["ClusterXSetsUMAP"] = static_fig
4259
4260            self.UMAP_plot["html"]["ClusterXSetsUMAP"] = html_fig
4261
4262            self._tmp_data_old = self.tmp_data
4263            self._tmp_metadata_old = self.tmp_metadata
4264
4265            self.tmp_data = tmp_data
4266            self.tmp_metadata = tmp_metadata
4267
4268        else:
4269            print(
4270                "\nNo data for visualization. Please use the UMAP() and db_scan() methods first."
4271            )
4272
4273    ## save data
4274    def full_info(self):
4275        """
4276        Merge data with metadata based on the 'full_id' column.
4277
4278        This method combines `self.tmp_data` and `self.tmp_metadata` into a single DataFrame if the metadata contains a 'full_id' column. If 'full_id' is not present, the method prints a warning to complete the preprocessing pipeline.
4279
4280        Returns
4281        -------
4282        pd.DataFrame or None
4283            Merged DataFrame containing both data and metadata if 'full_id' exists; otherwise, None.
4284        """
4285
4286        tmp_data = self.tmp_data.copy()
4287        tmp_metadata = self.tmp_metadata.copy()
4288
4289        if "full_id" in tmp_metadata.columns:
4290            tmp_data.index = tmp_metadata["full_id"]
4291
4292            merged_df = tmp_data.merge(
4293                tmp_metadata, left_index=True, right_on="full_id", how="left"
4294            )
4295
4296            return merged_df
4297
4298        else:
4299
4300            print("\nMetadata is not completed!")
4301
4302        #################################################################################
4303
4304    def DFA(self, meta_group_by: str = "sets", sets: dict = {}, n_proc=5):
4305        """
4306        Perform Differential Feature Analysis (DFA) on specified data groups.
4307
4308        This method conducts DFA using a grouping factor from metadata and a dictionary of sets for comparison. It allows for the identification of significant differences across defined sets.
4309
4310        The analysis includes:
4311        - Mann–Whitney U test
4312        - Percentage of non-zero values
4313        - Means and standard deviations
4314        - Effect size metric (ESM)
4315        - Benjamini–Hochberg FDR correction
4316        - Fold-change and log2 fold-change
4317
4318        Parameters
4319        ----------
4320        meta_group_by : str, optional
4321            Metadata column used for grouping during the analysis.
4322            Default is ``'sets'``.
4323            To view available grouping categories, use ``self.groups``.
4324
4325        sets : dict, optional
4326            Dictionary defining groups for pairwise comparison.
4327            Keys correspond to group names, and values are lists of labels
4328            belonging to each group.
4329
4330            Example
4331            -------
4332            >>> sets = {
4333            ...     'healthy': ['21q'],
4334            ...     'disease': ['71q', '77q', '109q']
4335            ... }
4336            In this configuration, the *healthy* group is compared against the
4337            aggregated *disease* groups.
4338
4339        n_proc : int, optional
4340            Number of CPU cores used for parallel processing.
4341            Default is ``5``.
4342
4343        Returns
4344        -------
4345        pandas.DataFrame or None
4346            A DataFrame containing statistical results for each feature, including:
4347
4348            - ``feature`` : str
4349            - ``p_val`` : float
4350            - ``adj_pval`` : float
4351            - ``pct_valid`` : float
4352            - ``pct_ctrl`` : float
4353            - ``avg_valid`` : float
4354            - ``avg_ctrl`` : float
4355            - ``sd_valid`` : float
4356            - ``sd_ctrl`` : float
4357            - ``esm`` : float
4358            - ``FC`` : float
4359            - ``log(FC)`` : float
4360            - ``norm_diff`` : float
4361            - ``valid_group`` : str
4362            - ``-log(p_val)`` : float
4363
4364            If ``sets`` is ``None``, results for each group are concatenated.
4365
4366            Returns ``None`` in case of errors or invalid parameters.
4367
4368        Notes
4369        -----
4370        - Columns containing only zeros are automatically removed.
4371        - p-values equal for both groups produce ``p_val = 1``.
4372        - Benjamini–Hochberg correction is applied separately within each group comparison.
4373        - Fold-change is stabilized using a small, data-derived ``low_factor``.
4374        - Uses ``Mann–Whitney U`` test with ``alternative='two-sided'``.
4375
4376        """
4377
4378        tmp_data = self.tmp_data.copy()
4379
4380        tmp_data = tmp_data.select_dtypes(include="number")
4381
4382        tmp_metadata = self.tmp_metadata.copy()
4383
4384        if len(sets.keys()) >= 2:
4385            print("\nAnalysis strated on provided sets dictionary and meta_group_by...")
4386            tmp_data.index = list(tmp_metadata[meta_group_by])
4387            tmp_metadata["sets"] = tmp_metadata[meta_group_by]
4388            results = statistic(
4389                tmp_data.transpose(), sets=sets, metadata=tmp_metadata, n_proc=n_proc
4390            )
4391
4392        else:
4393            print(
4394                "\nAnalysis strated on for all groups to each other in meta_group_by..."
4395            )
4396            tmp_data.index = list(tmp_metadata[meta_group_by])
4397            tmp_metadata["sets"] = tmp_metadata[meta_group_by]
4398            results = statistic(
4399                tmp_data.transpose(), sets=None, metadata=tmp_metadata, n_proc=n_proc
4400            )
4401
4402        self.DFA_results = results
4403
4404    def heatmap_DFA(
4405        self,
4406        p_value: float | int = 0.05,
4407        top_n: int = 5,
4408        scale: bool = False,
4409        clustering: str | None = "ward",
4410        figsize=(10, 5),
4411    ):
4412        """
4413        Generate a heatmap of the top DFA features filtered by p-value and log fold change.
4414
4415        Parameters
4416        ----------
4417        p_value : float or int, optional
4418            Significance threshold used to filter features by their p-value.
4419            Only features with p_val < p_value are included. Default is 0.05.
4420
4421        top_n : int, optional
4422            Number of top features selected per group based on the 'esm' score.
4423            Default is 5.
4424
4425        scale : bool, optional
4426            Whether to apply Min–Max scaling to heatmap values across features.
4427            Default is False.
4428
4429        clustering : str or None, optional
4430            Hierarchical clustering method applied to rows/columns of the heatmap.
4431            If None, clustering is disabled. Default is 'ward'.
4432
4433        figsize : tuple, optional
4434            Size of the resulting matplotlib figure. Default is (10, 5).
4435
4436        Notes
4437        -----
4438        - Only features with a positive log fold change ('log(FC)' > 0) are considered.
4439        - Heatmap values represent -log10(p_value) for visualization.
4440        - If `scale=True`, values are normalized using Min–Max scaling.
4441        - The generated figure is displayed and stored in `self.DFA_plot`.
4442        """
4443
4444        df_reduced = self.DFA_results.copy()
4445
4446        df_reduced = df_reduced[df_reduced["log(FC)"] > 0]
4447
4448        df_reduced = df_reduced[df_reduced["p_val"] < p_value]
4449
4450        df_reduced = (
4451            df_reduced.sort_values(["valid_group", "esm"], ascending=[True, False])
4452            .groupby("valid_group", as_index=False)
4453            .head(top_n)
4454        )
4455
4456        heatmap_data = df_reduced.pivot(
4457            index="feature", columns="valid_group", values="-log(p_val)"
4458        ).fillna(0)
4459
4460        label = "-log10(p_value)"
4461
4462        if scale:
4463            label = f"scaled({label})"
4464            scaler = MinMaxScaler()
4465            heatmap_data = pd.DataFrame(
4466                scaler.fit_transform(heatmap_data),
4467                index=heatmap_data.index,
4468                columns=heatmap_data.columns,
4469            )
4470
4471        if clustering is not None:
4472            Z_rows = linkage(heatmap_data.values, method=clustering)
4473            row_order = leaves_list(Z_rows)
4474
4475            Z_cols = linkage(heatmap_data.values.T, method=clustering)
4476            col_order = leaves_list(Z_cols)
4477
4478            heatmap_data = heatmap_data.iloc[row_order, col_order]
4479
4480        figure = plt.figure(figsize=figsize)
4481        sns.heatmap(
4482            heatmap_data,
4483            cmap="viridis",
4484            linewidths=0.5,
4485            linecolor="gray",
4486            cbar_kws={"label": label},
4487            fmt=".2f",
4488        )
4489        plt.ylabel("Feature")
4490        plt.xlabel("Cluster")
4491        plt.xticks(rotation=30, ha="right")
4492
4493        plt.tight_layout()
4494
4495        if cfg._DISPLAY_MODE:
4496            plt.show()
4497
4498        self.DFA_plot = figure
4499
4500    def get_DFA_plot(self, show: bool = True):
4501        """
4502        Retrieve the heatmap figure generated by `heatmap_DFA()`.
4503
4504        Parameters
4505        ----------
4506        show : bool, optional
4507            Whether to display the stored heatmap figure. Default is True.
4508
4509        Returns
4510        -------
4511        matplotlib.figure.Figure
4512            The figure object containing the DFA heatmap.
4513        """
4514
4515        if self.DFA_plot is None:
4516            print("\nNo results to return! Please run the heatmap_DFA() method first.")
4517        else:
4518            if cfg._DISPLAY_MODE:
4519                if show is True:
4520                    self.DFA_plot
4521                    try:
4522                        display(self.DFA_plot)
4523                    except:
4524                        None
4525
4526            return self.DFA_plot
4527
4528    def print_avaiable_features(self):
4529        """
4530        Print the available features (columns) in the current dataset.
4531
4532        This method lists all column names in `self.tmp_data` to help identify which features are available for analysis.
4533
4534        Example
4535        -------
4536        >>> group_analysis.print_avaiable_features()
4537        """
4538
4539        print("Avaiable features:")
4540        for cl in self.tmp_data.columns:
4541            print(cl)
4542
4543    def proportion_analysis(
4544        self,
4545        grouping_col: str = "sets",
4546        val_col: str = "nuclei_per_img",
4547        grouping_dict=None,
4548        omit=None,
4549    ):
4550        """
4551        Perform proportion analysis by comparing the distribution of values across groups.
4552
4553        This method analyzes the distribution of values (e.g., nuclei counts) across different groups defined in the dataset. It can optionally group categories, omit specific values, and produces both statistical results and a visualization.
4554
4555        Parameters
4556        ----------
4557        grouping_col : str, optional
4558            Column to group by. Default is 'sets'.
4559
4560        val_col : str, optional
4561            Column containing the values to analyze. Default is 'nuclei_per_img'.
4562
4563        grouping_dict : dict or None, optional
4564            Dictionary mapping new group names to categories in `grouping_col`. If None, analysis is based on the original groups.
4565
4566        omit : str, list, or None, optional
4567            Values to exclude from the analysis. Default is None.
4568
4569        Attributes
4570        ----------
4571        proportion_stats : pd.DataFrame
4572            DataFrame containing chi-square test results for pairwise group comparisons.
4573
4574        proportion_plot : matplotlib.figure.Figure
4575            Plot visualizing the proportions across groups.
4576
4577        Example
4578        -------
4579        >>> group_analysis.proportion_analysis(
4580        ...     grouping_col='sets',
4581        ...     val_col='nuclei_per_img',
4582        ...     grouping_dict={'Group A': [1, 2], 'Group B': [3, 4]},
4583        ...     omit=5
4584        ... )
4585        """
4586
4587        andata = self.tmp_data.copy()
4588
4589        andata[grouping_col] = list(self.tmp_metadata[grouping_col])
4590
4591        andata = andata[[grouping_col, val_col]]
4592
4593        if omit is not None:
4594            if isinstance(omit, list):
4595                andata = andata[~andata[val_col].isin(omit)]
4596            else:
4597                andata = andata[andata[val_col] != omit]
4598
4599        andata = andata.reset_index(drop=True)
4600        andata["index_col"] = andata.index
4601
4602        if isinstance(grouping_dict, dict):
4603            for k in grouping_dict.keys():
4604                andata.loc[
4605                    andata[grouping_col].isin(grouping_dict[k]), grouping_col
4606                ] = k
4607
4608        df_pivot = andata.pivot_table(
4609            index=val_col,
4610            columns=grouping_col,
4611            values="index_col",
4612            aggfunc="count",
4613            fill_value=0,
4614        )
4615
4616        Z_rows = linkage(df_pivot.values, method="ward")
4617        row_order = leaves_list(Z_rows)
4618
4619        Z_cols = linkage(df_pivot.values.T, method="ward")
4620        col_order = leaves_list(Z_cols)
4621
4622        df_pivot = df_pivot.iloc[row_order, col_order]
4623
4624        chi_df = chi_pairs(df_pivot)
4625
4626        self.proportion_stats = chi_pairs(df_pivot)
4627
4628        chi_df["Significance_Label"] = chi_df["p-value"].apply(get_significance_label)
4629
4630        self.proportion_plot = prop_plot(df_pivot, chi_df)
4631
4632    def get_proportion_plot(self, show: bool = True):
4633        """
4634        Retrieve the proportion bar plot generated by the `proportion_analysis()` method.
4635
4636        Parameters
4637        ----------
4638        show : bool, optional
4639            Whether to display the proportion bar plot. Default is True.
4640
4641        Returns
4642        -------
4643        matplotlib.figure.Figure
4644            The figure object containing the proportion bar plot.
4645        """
4646
4647        if self.proportion_plot is None:
4648            print(
4649                "\nNo results to return! Please run the proportion_analysis() method first."
4650            )
4651        else:
4652            if cfg._DISPLAY_MODE:
4653                if show:
4654                    self.proportion_plot
4655                    try:
4656                        display(self.proportion_plot)
4657                    except:
4658                        None
4659
4660            return self.proportion_plot
4661
4662    def get_proportion_stats(self):
4663        """
4664        Retrieve the proportion statistics computed by the `proportion_analysis()` method.
4665
4666        Returns
4667        -------
4668        pd.DataFrame
4669            The proportion statistics stored in `self.proportion_stats`.
4670        """
4671
4672        if None in self.proportion_stats:
4673            print(
4674                "\nNo results to return! Please run the proportion_analysis() method first."
4675            )
4676        else:
4677            return self.proportion_stats

A class for performing multivariate analysis, dimensionality reduction, clustering, and differential feature analysis (DFA) on biological or experimental datasets.

This class provides tools for:

  • Scaling and PCA of input data
  • UMAP embedding and DBSCAN clustering
  • Differential Feature Analysis across groups
  • Proportion analysis and plotting
  • Data selection and merging with metadata

Attributes

input_data : pd.DataFrame The primary dataset containing features for analysis.

input_metadata : pd.DataFrame Metadata corresponding to the input data, including identifiers and group labels.

tmp_data : pd.DataFrame Temporary copy of the input data, used for feature selection and filtering.

tmp_metadata : pd.DataFrame Temporary copy of metadata, used for filtered or subsetted operations.

scaled_data : np.ndarray or None Scaled version of the temporary dataset (tmp_data), updated after data_scale().

PCA_results : np.ndarray or None Results of PCA transformation applied on scaled data.

var_data : np.ndarray or None Explained variance ratio from PCA.

knee_plot : matplotlib.figure.Figure or None Figure of cumulative explained variance for PCA components.

UMAP_data : np.ndarray or None Embedding results from UMAP dimensionality reduction.

UMAP_plot : dict Dictionary containing UMAP plots. Keys: 'static' (matplotlib) and 'html' (plotly).

dblabels : list or None Cluster labels assigned by DBSCAN after UMAP embedding.

explained_variance_ratio : np.ndarray or None Explained variance ratio of PCA components.

DFA_results : pd.DataFrame or None Results of Differential Feature Analysis (DFA).

proportion_stats : pd.DataFrame or None Statistics from proportion analysis.

proportion_plot : matplotlib.figure.Figure or None Figure of proportion analysis results.

Methods

resest_project(): Reset all temporary and analysis results to initial state.

load_data(data, ids_col='id_name', set_col='set'): Class method to load data and metadata and initialize the object.

groups: Property returning available groups in the metadata.

get_DFA(), get_PCA(), get_knee_plot(), get_var_data(), get_scaled_data(): Methods to retrieve previously computed results.

UMAP(), db_scan(), UMAP_on_clusters(): Methods for dimensionality reduction and clustering visualization.

DFA(meta_group_by='sets', sets={}, n_proc=5): Perform Differential Feature Analysis.

proportion_analysis(grouping_col='sets', val_col='nuclei_per_img', ...): Perform and plot proportion analysis across groups.

GroupAnalysis(input_data, input_metadata)
3533    def __init__(
3534        self,
3535        input_data,
3536        input_metadata,
3537    ):
3538        """
3539        Initialize a GroupAnalysis instance with data and metadata.
3540
3541        Parameters
3542        ----------
3543        input_data : pd.DataFrame
3544            Dataset containing features for analysis. Rows represent samples and columns represent features.
3545
3546        input_metadata : pd.DataFrame
3547            Metadata corresponding to `input_data`, including sample identifiers and group labels.
3548        """
3549
3550        self.input_data = input_data
3551        """Stored input dataset for analysis."""
3552
3553        self.input_metadata = input_metadata
3554        """Stored metadata associated with `input_data`."""
3555
3556        self.tmp_metadata = input_metadata
3557        """Temporary copy of `input_data` used for filtering, selection, or scaling."""
3558
3559        self.tmp_data = input_data
3560        """Temporary copy of `input_metadata` used for filtered operations."""
3561
3562        self.scaled_data = None
3563        """Stored scaled version of `tmp_data` after normalization or standardization."""
3564
3565        self.PCA_results = None
3566        """ Stored results of PCA transformation applied on `scaled_data`."""
3567
3568        self.var_data = None
3569        """Sotred explained variance ratio for PCA components."""
3570
3571        self.knee_plot = None
3572        """Figure showing cumulative explained variance for PCA."""
3573
3574        self.UMAP_data = None
3575        """Stored embedding coordinates from UMAP dimensionality reduction."""
3576
3577        self.UMAP_plot = {"static": {}, "html": {}}
3578        """Stored dictionary containing UMAP plots: 'static' (matplotlib) and 'html' (plotly)."""
3579
3580        self.dblabels = None
3581        """Stored cluster labels assigned by DBSCAN after UMAP embedding."""
3582
3583        self.explained_variance_ratio = None
3584        """Stored explained variance ratio of PCA components."""
3585
3586        self.DFA_results = None
3587        """Stored Differential Feature Analysis (DFA) results."""
3588
3589        self.proportion_stats = None
3590        """Stored statistics from proportion analysis of groups."""
3591
3592        self.proportion_plot = None
3593        """Figure visualizing proportion analysis results."""

Initialize a GroupAnalysis instance with data and metadata.

Parameters

input_data : pd.DataFrame Dataset containing features for analysis. Rows represent samples and columns represent features.

input_metadata : pd.DataFrame Metadata corresponding to input_data, including sample identifiers and group labels.

input_data

Stored input dataset for analysis.

input_metadata

Stored metadata associated with input_data.

tmp_metadata

Temporary copy of input_data used for filtering, selection, or scaling.

tmp_data

Temporary copy of input_metadata used for filtered operations.

scaled_data

Stored scaled version of tmp_data after normalization or standardization.

PCA_results

Stored results of PCA transformation applied on scaled_data.

var_data

Sotred explained variance ratio for PCA components.

knee_plot

Figure showing cumulative explained variance for PCA.

UMAP_data

Stored embedding coordinates from UMAP dimensionality reduction.

UMAP_plot

Stored dictionary containing UMAP plots: 'static' (matplotlib) and 'html' (plotly).

dblabels

Stored cluster labels assigned by DBSCAN after UMAP embedding.

explained_variance_ratio

Stored explained variance ratio of PCA components.

DFA_results

Stored Differential Feature Analysis (DFA) results.

proportion_stats

Stored statistics from proportion analysis of groups.

proportion_plot

Figure visualizing proportion analysis results.

def resest_project(self):
3595    def resest_project(self):
3596        """
3597        Resets the project state by clearing or reinitializing various attributes.
3598
3599        This method resets the following attributes to initial values:
3600        - `tmp_metadata`
3601        - `tmp_data`
3602        - `scaled_data`
3603        - `PCA_results`
3604        - `var_data`
3605        - `knee_plot`
3606        - `UMAP_data`
3607        - `UMAP_plot`
3608        - `dblabels`
3609        - `explained_variance_ratio`
3610        - `DFA_results`
3611
3612        This method is typically called to reinitialize the project data and results, preparing the system for new computations or project resets.
3613        """
3614
3615        self.tmp_metadata = self.input_metadata
3616        self.tmp_data = self.input_data
3617        self.scaled_data = None
3618        self.PCA_results = None
3619        self.var_data = None
3620        self.knee_plot = None
3621        self.UMAP_data = None
3622        self.UMAP_plot = {"static": {}, "html": {}}
3623        self.dblabels = None
3624        self.explained_variance_ratio = None
3625        self.DFA_results = None
3626        self.proportion_stats = None
3627        self.proportion_plot = None

Resets the project state by clearing or reinitializing various attributes.

This method resets the following attributes to initial values:

This method is typically called to reinitialize the project data and results, preparing the system for new computations or project resets.

@classmethod
def load_data(cls, data, ids_col: str = 'id_name', set_col: str = 'set'):
3629    @classmethod
3630    def load_data(cls, data, ids_col: str = "id_name", set_col: str = "set"):
3631        """
3632        Load data and initialize the class by storing both the feature data and metadata.
3633
3634        Parameters
3635        ----------
3636        data : pd.DataFrame
3637            Input dataset used for group analysis. Must contain both feature columns and
3638            metadata columns specified by `ids_col` and `set_col`.
3639
3640        ids_col : str, optional
3641            Name of the column containing unique object identifiers.
3642            Default is ``'id_name'``.
3643
3644        set_col : str, optional
3645            Name of the column specifying group or set assignment for each object.
3646            Default is ``'set'``.
3647
3648        Notes
3649        -----
3650        This method performs in-place initialization of the class and does not return
3651        a separate object. All loaded data and metadata become available through the
3652        class attributes for downstream analysis.
3653
3654        This method updates internal class attributes:
3655
3656        - **input_data** : pd.DataFrame
3657          Cleaned feature table with index set to object IDs.
3658
3659        - **tmp_data** : pd.DataFrame
3660          Copy of `input_data` used for temporary operations.
3661
3662        - **input_metadata** : pd.DataFrame
3663          Metadata containing object IDs and group assignments.
3664
3665        - **tmp_metadata** : pd.DataFrame
3666          Copy of `input_metadata` for temporary operations.
3667        """
3668
3669        data = data.dropna()
3670
3671        metadata = pd.DataFrame()
3672        metadata["id"] = data[ids_col]
3673        metadata["sets"] = data[set_col]
3674
3675        data.index = data[ids_col]
3676
3677        try:
3678            data.pop("id_name")
3679        except:
3680            None
3681
3682        try:
3683            data.pop("Object Number")
3684        except:
3685            None
3686
3687        return cls(data, metadata)

Load data and initialize the class by storing both the feature data and metadata.

Parameters

data : pd.DataFrame Input dataset used for group analysis. Must contain both feature columns and metadata columns specified by ids_col and set_col.

ids_col : str, optional Name of the column containing unique object identifiers. Default is 'id_name'.

set_col : str, optional Name of the column specifying group or set assignment for each object. Default is 'set'.

Notes

This method performs in-place initialization of the class and does not return a separate object. All loaded data and metadata become available through the class attributes for downstream analysis.

This method updates internal class attributes:

  • input_data : pd.DataFrame Cleaned feature table with index set to object IDs.

  • tmp_data : pd.DataFrame Copy of input_data used for temporary operations.

  • input_metadata : pd.DataFrame Metadata containing object IDs and group assignments.

  • tmp_metadata : pd.DataFrame Copy of input_metadata for temporary operations.

groups
3689    @property
3690    def groups(self):
3691        """
3692        Return information about available groups in the metadata for ``self.DFA``.
3693
3694        Returns
3695        -------
3696        dict
3697            Dictionary mapping each metadata column name to a list of unique groups
3698            available in that column.
3699        """
3700
3701        try:
3702            return {
3703                "sets": set(self.tmp_metadata["sets"]),
3704                "full_name": set(self.tmp_metadata["full_name"]),
3705            }
3706        except:
3707            return {"sets": set(self.tmp_metadata["sets"])}

Return information about available groups in the metadata for self.DFA.

Returns

dict Dictionary mapping each metadata column name to a list of unique groups available in that column.

def get_DFA(self):
3709    def get_DFA(self):
3710        """
3711        Retrieve the DFA results produced by the ``DFA()`` method.
3712
3713        Returns
3714        -------
3715        pd.DataFrame
3716            The DFA results stored in ``self.DFA_results``.
3717        """
3718
3719        if None in self.DFA_results:
3720            print("\nNo results to return! Please run the DFA() method first.")
3721        else:
3722            return self.DFA_results

Retrieve the DFA results produced by the DFA() method.

Returns

pd.DataFrame The DFA results stored in self.DFA_results.

def get_PCA(self):
3724    def get_PCA(self):
3725        """
3726        Retrieve the PCA results produced by the ``PCA()`` method.
3727
3728        Returns
3729        -------
3730        np.ndarray
3731            The PCA results stored in ``self.PCA_results``.
3732        """
3733
3734        if None in self.PCA_results:
3735            print("\nNo results to return! Please run the PCA() method first.")
3736        else:
3737            return self.PCA_results

Retrieve the PCA results produced by the PCA() method.

Returns

np.ndarray The PCA results stored in self.PCA_results.

def get_knee_plot(self, show: bool = True):
3739    def get_knee_plot(self, show: bool = True):
3740        """
3741        Retrieve the knee plot of cumulative explained variance generated by the ``var_plot()`` method.
3742
3743        Parameters
3744        ----------
3745        show : bool, optional
3746            If ``True`` (default), the knee plot is displayed.
3747
3748        Returns
3749        -------
3750        matplotlib.figure.Figure
3751            The figure object containing the knee plot.
3752        """
3753
3754        if self.knee_plot is None:
3755            print("\nNo results to return! Please run the var_plot() method first.")
3756        else:
3757            if cfg._DISPLAY_MODE:
3758                if show is True:
3759                    self.knee_plot
3760                    try:
3761                        display(self.knee_plot)
3762                    except:
3763                        None
3764
3765            return self.knee_plot

Retrieve the knee plot of cumulative explained variance generated by the var_plot() method.

Parameters

show : bool, optional If True (default), the knee plot is displayed.

Returns

matplotlib.figure.Figure The figure object containing the knee plot.

def get_var_data(self):
3767    def get_var_data(self):
3768        """
3769        Retrieve the explained variance data from the ``var_plot()`` method.
3770
3771        Returns
3772        -------
3773        np.ndarray
3774            Array containing the explained variance values stored in ``self.var_data``.
3775        """
3776
3777        if None in self.var_data:
3778            print("\nNo results to return! Please run the var_plot() method first.")
3779        else:
3780            return self.var_data

Retrieve the explained variance data from the var_plot() method.

Returns

np.ndarray Array containing the explained variance values stored in self.var_data.

def get_scaled_data(self):
3782    def get_scaled_data(self):
3783        """
3784        Retrieve the scaled data produced by the ``data_scale()`` method.
3785
3786        Returns
3787        -------
3788        np.ndarray
3789            Scaled data stored in ``self.scaled_data``.
3790        """
3791
3792        if None in self.scaled_data:
3793            print("\nNo results to return! Please run the data_scale() method first.")
3794        else:
3795            return self.scaled_data

Retrieve the scaled data produced by the data_scale() method.

Returns

np.ndarray Scaled data stored in self.scaled_data.

def get_UMAP_data(self):
3797    def get_UMAP_data(self):
3798        """
3799        Retrieve the UMAP-transformed data generated by the ``UMAP()`` method.
3800
3801        Returns
3802        -------
3803        np.ndarray
3804            UMAP-embedded data stored in ``self.UMAP_data``.
3805        """
3806
3807        if None in self.UMAP_data:
3808            print("\nNo results to return! Please run the UMAP() method first.")
3809        else:
3810            return self.UMAP_data

Retrieve the UMAP-transformed data generated by the UMAP() method.

Returns

np.ndarray UMAP-embedded data stored in self.UMAP_data.

def get_UMAP_plots(self, plot_type: str = 'static', show: bool = True):
3812    def get_UMAP_plots(self, plot_type: str = "static", show: bool = True):
3813        """
3814        Retrieve UMAP plots generated by the ``UMAP()`` and/or ``UMAP_on_clusters()`` methods.
3815
3816        Parameters
3817        ----------
3818        show : bool, optional
3819            Whether to display the UMAP plots. Default is True.
3820
3821        Returns
3822        -------
3823        dict of matplotlib.figure.Figure
3824            A dictionary containing the UMAP plots. Keys correspond to plot names, and values are the figure objects.
3825        """
3826
3827        if plot_type == "html":
3828
3829            if len(self.UMAP_plot["html"].keys()) == 0:
3830                print(
3831                    "\nNo results to return! Please run the UMAP() and / or UMAP_on_clusters() methods first."
3832                )
3833            else:
3834                if cfg._DISPLAY_MODE:
3835                    if show:
3836                        for k in self.UMAP_plot["html"].keys():
3837                            self.UMAP_plot["html"][k]
3838                            try:
3839                                display(self.UMAP_plot["html"][k])
3840                            except:
3841                                None
3842
3843                return self.UMAP_plot["html"]
3844
3845        else:
3846
3847            if len(self.UMAP_plot["static"].keys()) == 0:
3848                print(
3849                    "\nNo results to return! Please run the UMAP() and / or UMAP_on_clusters() methods first."
3850                )
3851            else:
3852                if cfg._DISPLAY_MODE:
3853                    if show:
3854                        for k in self.UMAP_plot["static"].keys():
3855                            self.UMAP_plot["static"][k]
3856                            try:
3857                                display(self.UMAP_plot["static"][k])
3858                            except:
3859                                None
3860
3861                return self.UMAP_plot["static"]

Retrieve UMAP plots generated by the UMAP() and/or UMAP_on_clusters() methods.

Parameters

show : bool, optional Whether to display the UMAP plots. Default is True.

Returns

dict of matplotlib.figure.Figure A dictionary containing the UMAP plots. Keys correspond to plot names, and values are the figure objects.

def select_data(self, features_list: list = []):
3863    def select_data(self, features_list: list = []):
3864        """
3865        Select specific features (columns) from the dataset for further analysis.
3866
3867        Parameters
3868        ----------
3869        features_list : list of str, optional
3870            List of feature names (column names) to select from the dataset. Default is an empty list, which selects no features.
3871
3872        Notes
3873        -----
3874        Modifies the `self.tmp_data` attribute to contain only the selected features from `self.input_data`.
3875        """
3876
3877        dat = self.input_data.copy()
3878
3879        not_in_columns = [name for name in features_list if name not in dat.columns]
3880
3881        if not_in_columns:
3882            print("These names are not in data", not_in_columns)
3883        else:
3884            print("All names are present in data.")
3885
3886        in_columns = [name for name in features_list if name in dat.columns]
3887
3888        dat = dat[in_columns]
3889
3890        self.tmp_data = dat

Select specific features (columns) from the dataset for further analysis.

Parameters

features_list : list of str, optional List of feature names (column names) to select from the dataset. Default is an empty list, which selects no features.

Notes

Modifies the self.tmp_data attribute to contain only the selected features from self.input_data.

def data_scale(self):
3892    def data_scale(self):
3893        """
3894        Scale the data using standardization (z-score normalization).
3895
3896        This method applies `StandardScaler` from scikit-learn to the temporary dataset (`self.tmp_data`) and stores the scaled data.
3897
3898        Notes
3899        -----
3900        Modifies the `self.scaled_data` attribute to contain the standardized version of `self.tmp_data`.
3901        """
3902
3903        if None not in self.tmp_data:
3904
3905            def is_id_column(name: str):
3906                name_lower = name.lower()
3907                return name_lower == "id" or "id_" in name_lower or "_id" in name_lower
3908
3909            tmp = self.tmp_data
3910
3911            cols_with_strings = [
3912                c
3913                for c in tmp.columns
3914                if tmp[c].apply(lambda x: isinstance(x, str)).any()
3915            ]
3916
3917            cols_id_pattern = [c for c in tmp.columns if is_id_column(c)]
3918
3919            cols_to_drop = list(set(cols_id_pattern + cols_with_strings))
3920
3921            tmp = tmp.drop(columns=cols_to_drop)
3922
3923            scaler = StandardScaler()
3924
3925            self.scaled_data = scaler.fit_transform(tmp)
3926
3927        else:
3928            print(
3929                "\nNo data to scale. Please use the load_data() method first, and optionally the select_data() method."
3930            )

Scale the data using standardization (z-score normalization).

This method applies StandardScaler from scikit-learn to the temporary dataset (self.tmp_data) and stores the scaled data.

Notes

Modifies the self.scaled_data attribute to contain the standardized version of self.tmp_data.

def PCA(self):
3932    def PCA(self):
3933        """
3934        Perform Principal Component Analysis (PCA) on the scaled data.
3935
3936        This method reduces the dimensionality of `self.scaled_data` while retaining the maximum variance.
3937
3938        Notes
3939        -----
3940        Modifies the `self.PCA_results` attribute with the PCA-transformed data.
3941        """
3942
3943        if None not in self.scaled_data:
3944            pca = PCA(n_components=self.scaled_data.shape[1])
3945            self.PCA_results = pca.fit_transform(self.scaled_data)
3946            self.explained_variance_ratio = pca.explained_variance_ratio_
3947        else:
3948            print("\nNo data for PCA. Please use the data_scale() method first.")

Perform Principal Component Analysis (PCA) on the scaled data.

This method reduces the dimensionality of self.scaled_data while retaining the maximum variance.

Notes

Modifies the self.PCA_results attribute with the PCA-transformed data.

def var_plot(self):
3950    def var_plot(self):
3951        """
3952        Plot the cumulative explained variance of the principal components from PCA.
3953
3954        This method visualizes the cumulative explained variance to help determine how many components capture most of the variance.
3955
3956        Notes
3957        -----
3958        Stores results in the following attributes:
3959        - `self.var_data` (np.ndarray): Explained variance ratio for each principal component.
3960        - `self.knee_plot` (matplotlib.figure.Figure): Figure of the cumulative explained variance plot.
3961        """
3962
3963        if None not in self.PCA_results:
3964
3965            fig, _ = plt.subplots(figsize=(15, 7))
3966            explained_var = self.explained_variance_ratio
3967
3968            cumulative_var = np.cumsum(explained_var)
3969
3970            # Plot the cumulative explained variance as a function of the number of components
3971            plt.plot(cumulative_var)
3972            plt.xlabel("Number of Components")
3973            plt.ylabel("Cumulative Explained Variance")
3974            plt.title("Explained variance of PCs")
3975            plt.xticks(np.arange(0, len(cumulative_var) + 1, step=1))
3976
3977            self.var_data = explained_var
3978            self.knee_plot = fig
3979
3980        else:
3981
3982            print(
3983                "\nNo data for variance explanation analysis. Please use the PCA() method first."
3984            )

Plot the cumulative explained variance of the principal components from PCA.

This method visualizes the cumulative explained variance to help determine how many components capture most of the variance.

Notes

Stores results in the following attributes:

  • self.var_data (np.ndarray): Explained variance ratio for each principal component.
  • self.knee_plot (matplotlib.figure.Figure): Figure of the cumulative explained variance plot.
def UMAP( self, PC_num: int = 5, factorize_with_metadata: bool = False, harmonize_sets: bool = True, n_neighbors: int = 25, min_dist: float = 0.01, n_components: int = 2, width: int = 8, height: int = 6):
3986    def UMAP(
3987        self,
3988        PC_num: int = 5,
3989        factorize_with_metadata: bool = False,
3990        harmonize_sets: bool = True,
3991        n_neighbors: int = 25,
3992        min_dist: float = 0.01,
3993        n_components: int = 2,
3994        width: int = 8,
3995        height: int = 6,
3996    ):
3997        """
3998         Perform UMAP (Uniform Manifold Approximation and Projection) dimensionality reduction on PCA results.
3999
4000         UMAP is applied to the top principal components, optionally using metadata labels to influence the embedding. Generates both 2D/3D embeddings and visualizations.
4001
4002         Parameters
4003         ----------
4004         PC_num : int, optional
4005             Number of top principal components to use for UMAP embedding. Default is 5.
4006
4007         factorize_with_metadata : bool, optional
4008             Whether to use metadata (e.g., 'sets') to factorize UMAP embedding. Default is False.
4009
4010        harmonize_sets : bool, optional
4011             If True, applies harmonization across data sets before computing the UMAP embedding.
4012             Default is True.
4013
4014         n_neighbors : int, optional
4015             Number of neighbors for UMAP to compute local structure. Default is 25.
4016
4017         min_dist : float, optional
4018             Minimum distance between points in the low-dimensional embedding. Default is 0.01.
4019
4020         n_components : int, optional
4021             Number of dimensions for the UMAP embedding. Default is 2.
4022
4023         width : int, optional
4024             Width of the generated matplotlib figures (in inches). Default is 8.
4025
4026         height : int, optional
4027             Height of the generated matplotlib figures (in inches). Default is 6.
4028
4029         Notes
4030         -----
4031         Stores results in the following attributes:
4032         - `self.UMAP_data` (np.ndarray): UMAP-transformed data.
4033         - `self.UMAP_plot['static']['PrimaryUMAP']` (matplotlib.figure.Figure): Static visualization of UMAP embedding.
4034         - `self.UMAP_plot['html']['PrimaryUMAP']` (plotly.graph_objs.Figure): Interactive Plotly visualization of UMAP embedding.
4035        """
4036
4037        if None not in self.PCA_results:
4038
4039            reducer = umap.UMAP(
4040                n_neighbors=n_neighbors,
4041                min_dist=min_dist,
4042                n_components=n_components,
4043                random_state=42,
4044            )
4045
4046            pca_res = self.PCA_results
4047
4048            if harmonize_sets:
4049
4050                pca_res = np.array(pca_res)
4051
4052                pca_res = np.array(
4053                    harmonize.run_harmony(
4054                        pca_res, self.input_metadata, vars_use="sets"
4055                    ).Z_corr
4056                ).T
4057
4058            if factorize_with_metadata:
4059                numeric_labels = pd.Categorical(self.tmp_metadata["sets"]).codes
4060
4061                umap_result = reducer.fit_transform(
4062                    pca_res[:, : PC_num + 1], y=numeric_labels
4063                )
4064
4065            else:
4066                umap_result = reducer.fit_transform(pca_res[:, : PC_num + 1])
4067
4068            umap_result_plot = pd.DataFrame(umap_result.copy())
4069
4070            umap_result_plot["clusters"] = list(self.tmp_metadata["sets"])
4071
4072            static_fig = umap_static(umap_result_plot, width=width, height=height)
4073
4074            html_fig = umap_html(
4075                umap_result_plot, width=width * 100, height=height * 100
4076            )
4077
4078            self.UMAP_data = umap_result
4079
4080            self.UMAP_plot["static"]["PrimaryUMAP"] = static_fig
4081            self.UMAP_plot["html"]["PrimaryUMAP"] = html_fig
4082
4083        else:
4084
4085            print("\nNo data for UMAP. Please use the PCA() method first.")

Perform UMAP (Uniform Manifold Approximation and Projection) dimensionality reduction on PCA results.

UMAP is applied to the top principal components, optionally using metadata labels to influence the embedding. Generates both 2D/3D embeddings and visualizations.

Parameters


PC_num : int, optional Number of top principal components to use for UMAP embedding. Default is 5.

factorize_with_metadata : bool, optional Whether to use metadata (e.g., 'sets') to factorize UMAP embedding. Default is False.

harmonize_sets : bool, optional If True, applies harmonization across data sets before computing the UMAP embedding. Default is True.

n_neighbors : int, optional Number of neighbors for UMAP to compute local structure. Default is 25.

min_dist : float, optional Minimum distance between points in the low-dimensional embedding. Default is 0.01.

n_components : int, optional Number of dimensions for the UMAP embedding. Default is 2.

width : int, optional Width of the generated matplotlib figures (in inches). Default is 8.

height : int, optional Height of the generated matplotlib figures (in inches). Default is 6.

Notes


Stores results in the following attributes:

  • self.UMAP_data (np.ndarray): UMAP-transformed data.
  • self.UMAP_plot['static']['PrimaryUMAP'] (matplotlib.figure.Figure): Static visualization of UMAP embedding.
  • self.UMAP_plot['html']['PrimaryUMAP'] (plotly.graph_objs.Figure): Interactive Plotly visualization of UMAP embedding.
def db_scan(self, eps=0.5, min_samples: int = 10):
4087    def db_scan(self, eps=0.5, min_samples: int = 10):
4088        """
4089        Perform DBSCAN clustering on UMAP-transformed data.
4090
4091        DBSCAN identifies clusters based on density, labeling points in dense regions as clusters and others as noise.
4092
4093        Parameters
4094        ----------
4095        eps : float, optional
4096            Maximum distance between two points to be considered neighbors. Default is 0.5.
4097
4098        min_samples : int, optional
4099            Minimum number of points required to form a dense region (cluster). Default is 10.
4100
4101        Notes
4102        -----
4103        Stores the results in the following attribute:
4104        - `self.dblabels` (list of str): Cluster labels assigned by DBSCAN for each point in the UMAP embedding.
4105        """
4106
4107        if None not in self.UMAP_data:
4108
4109            dbscan = DBSCAN(eps=eps, min_samples=min_samples)
4110            dbscan_labels = dbscan.fit_predict(self.UMAP_data)
4111            self.dblabels = [str(x) for x in dbscan_labels]
4112
4113        else:
4114
4115            print("\nNo data for DBSCAN. Please use the UMAP() method first.")

Perform DBSCAN clustering on UMAP-transformed data.

DBSCAN identifies clusters based on density, labeling points in dense regions as clusters and others as noise.

Parameters

eps : float, optional Maximum distance between two points to be considered neighbors. Default is 0.5.

min_samples : int, optional Minimum number of points required to form a dense region (cluster). Default is 10.

Notes

Stores the results in the following attribute:

  • self.dblabels (list of str): Cluster labels assigned by DBSCAN for each point in the UMAP embedding.
def UMAP_on_clusters( self, min_entities: int = 50, width: int = 8, height: int = 6, n_per_col: int = 20):
4117    def UMAP_on_clusters(
4118        self,
4119        min_entities: int = 50,
4120        width: int = 8,
4121        height: int = 6,
4122        n_per_col: int = 20,
4123    ):
4124        """
4125        Generate UMAP visualizations for clusters filtered by a minimum entity threshold.
4126
4127        This method removes clusters containing fewer than `min_entities` observations
4128        and produces two UMAP visualizations:
4129
4130        1. **Cluster UMAP** – points colored by cluster assignment only.
4131        2. **Cluster × Set UMAP** – points colored by the combination of cluster and set identifier.
4132
4133        Parameters
4134        ----------
4135        min_entities : int, optional
4136            Minimum number of entities required for a cluster to be included
4137            in the visualization. Default is 50.
4138
4139        width : int, optional
4140            Width of the generated matplotlib figures (in inches). Default is 8.
4141
4142        height : int, optional
4143            Height of the generated matplotlib figures (in inches). Default is 6.
4144
4145        n_per_col : int, optional
4146            Maximum number of legend entries per column. Default is 20.
4147
4148        Notes
4149        -----
4150        This method updates the following attributes:
4151
4152        - `self.UMAP_plot['static']['ClusterUMAP']`
4153          Static matplotlib figure of the filtered cluster-only UMAP.
4154
4155        - `self.UMAP_plot['html']['ClusterUMAP']`
4156          Interactive HTML version of the cluster-only UMAP.
4157
4158        - `self.UMAP_plot['static']['ClusterXSetsUMAP']`
4159          Static matplotlib figure showing clusters combined with set identifiers.
4160
4161        - `self.UMAP_plot['html']['ClusterXSetsUMAP']`
4162          Interactive HTML version of the cluster × set visualization.
4163
4164        - `self.tmp_data`
4165          Dataset filtered to include only clusters meeting the `min_entities` threshold.
4166
4167        - `self.tmp_metadata`
4168          Metadata corresponding to the filtered dataset.
4169        """
4170
4171        if None not in self.UMAP_data:
4172
4173            if hasattr(self, "_tmp_data_old"):
4174                self.tmp_data = self._tmp_data_old
4175
4176            if hasattr(self, "_tmp_metadata_old"):
4177                self.tmp_metadata = self._tmp_metadata_old
4178
4179            umap_result = pd.DataFrame(self.UMAP_data.copy())
4180            umap_result["id"] = self.tmp_metadata.index
4181            umap_result["clusters"] = self.dblabels
4182            umap_result = umap_result[umap_result["clusters"] != "-1"]
4183            tmp_metadata = self.tmp_metadata.copy()
4184            tmp_metadata["clusters"] = self.dblabels
4185            tmp_metadata = tmp_metadata[tmp_metadata["clusters"] != "-1"]
4186            tmp_data = self.tmp_data.copy()
4187            tmp_data.index = self.dblabels
4188            tmp_data = tmp_data[tmp_data.index != "-1"]
4189
4190            label_counts_dict = Counter(self.dblabels)
4191
4192            label_counts = pd.DataFrame.from_dict(
4193                label_counts_dict, orient="index", columns=["count"]
4194            )
4195
4196            filtered_counts = label_counts[label_counts["count"] > min_entities]
4197
4198            tmp_metadata["full_id"] = list(
4199                tmp_metadata["id"].astype(str) + " # " + tmp_metadata["sets"]
4200            )
4201
4202            tmp_data.index = tmp_metadata["full_id"]
4203            umap_result["full_id"] = list(tmp_metadata["full_id"])
4204
4205            umap_result = umap_result[
4206                umap_result["clusters"].isin(np.array(filtered_counts.index))
4207            ]
4208            tmp_metadata = tmp_metadata[
4209                tmp_metadata["clusters"].isin(np.array(filtered_counts.index))
4210            ]
4211
4212            umap_result = umap_result.sort_values(
4213                by="clusters", key=lambda x: x.astype(int)
4214            )
4215
4216            tmp_data = tmp_data[tmp_data.index.isin(np.array(tmp_metadata["full_id"]))]
4217
4218            static_fig = umap_static(
4219                umap_result, width=width, height=height, n_per_col=n_per_col
4220            )
4221
4222            html_fig = umap_html(umap_result, width=width * 100, height=height * 100)
4223
4224            self.UMAP_plot["static"]["ClusterUMAP"] = static_fig
4225            self.UMAP_plot["html"]["ClusterUMAP"] = html_fig
4226
4227            tmp_metadata["full_name"] = list(
4228                tmp_metadata["clusters"] + " # " + tmp_metadata["sets"]
4229            )
4230
4231            label_counts_dict = Counter(list(tmp_metadata["full_name"]))
4232
4233            label_counts = pd.DataFrame.from_dict(
4234                label_counts_dict, orient="index", columns=["count"]
4235            )
4236
4237            filtered_counts = label_counts[label_counts["count"] > min_entities]
4238
4239            tmp_data.index = tmp_metadata["full_name"]
4240            umap_result["clusters"] = list(tmp_metadata["full_name"])
4241
4242            umap_result = umap_result[
4243                umap_result["clusters"].isin(np.array(filtered_counts.index))
4244            ]
4245
4246            tmp_metadata = tmp_metadata[
4247                tmp_metadata["full_name"].isin(np.array(filtered_counts.index))
4248            ]
4249
4250            tmp_data = tmp_data[tmp_data.index.isin(np.array(filtered_counts.index))]
4251
4252            static_fig = umap_static(
4253                umap_result, width=width, height=height, n_per_col=n_per_col
4254            )
4255
4256            html_fig = umap_html(umap_result, width=width * 100, height=height * 100)
4257
4258            self.UMAP_plot["static"]["ClusterXSetsUMAP"] = static_fig
4259
4260            self.UMAP_plot["html"]["ClusterXSetsUMAP"] = html_fig
4261
4262            self._tmp_data_old = self.tmp_data
4263            self._tmp_metadata_old = self.tmp_metadata
4264
4265            self.tmp_data = tmp_data
4266            self.tmp_metadata = tmp_metadata
4267
4268        else:
4269            print(
4270                "\nNo data for visualization. Please use the UMAP() and db_scan() methods first."
4271            )

Generate UMAP visualizations for clusters filtered by a minimum entity threshold.

This method removes clusters containing fewer than min_entities observations and produces two UMAP visualizations:

  1. Cluster UMAP – points colored by cluster assignment only.
  2. Cluster × Set UMAP – points colored by the combination of cluster and set identifier.

Parameters

min_entities : int, optional Minimum number of entities required for a cluster to be included in the visualization. Default is 50.

width : int, optional Width of the generated matplotlib figures (in inches). Default is 8.

height : int, optional Height of the generated matplotlib figures (in inches). Default is 6.

n_per_col : int, optional Maximum number of legend entries per column. Default is 20.

Notes

This method updates the following attributes:

  • self.UMAP_plot['static']['ClusterUMAP'] Static matplotlib figure of the filtered cluster-only UMAP.

  • self.UMAP_plot['html']['ClusterUMAP'] Interactive HTML version of the cluster-only UMAP.

  • self.UMAP_plot['static']['ClusterXSetsUMAP'] Static matplotlib figure showing clusters combined with set identifiers.

  • self.UMAP_plot['html']['ClusterXSetsUMAP'] Interactive HTML version of the cluster × set visualization.

  • self.tmp_data Dataset filtered to include only clusters meeting the min_entities threshold.

  • self.tmp_metadata Metadata corresponding to the filtered dataset.

def full_info(self):
4274    def full_info(self):
4275        """
4276        Merge data with metadata based on the 'full_id' column.
4277
4278        This method combines `self.tmp_data` and `self.tmp_metadata` into a single DataFrame if the metadata contains a 'full_id' column. If 'full_id' is not present, the method prints a warning to complete the preprocessing pipeline.
4279
4280        Returns
4281        -------
4282        pd.DataFrame or None
4283            Merged DataFrame containing both data and metadata if 'full_id' exists; otherwise, None.
4284        """
4285
4286        tmp_data = self.tmp_data.copy()
4287        tmp_metadata = self.tmp_metadata.copy()
4288
4289        if "full_id" in tmp_metadata.columns:
4290            tmp_data.index = tmp_metadata["full_id"]
4291
4292            merged_df = tmp_data.merge(
4293                tmp_metadata, left_index=True, right_on="full_id", how="left"
4294            )
4295
4296            return merged_df
4297
4298        else:
4299
4300            print("\nMetadata is not completed!")
4301
4302        #################################################################################

Merge data with metadata based on the 'full_id' column.

This method combines self.tmp_data and self.tmp_metadata into a single DataFrame if the metadata contains a 'full_id' column. If 'full_id' is not present, the method prints a warning to complete the preprocessing pipeline.

Returns

pd.DataFrame or None Merged DataFrame containing both data and metadata if 'full_id' exists; otherwise, None.

def DFA(self, meta_group_by: str = 'sets', sets: dict = {}, n_proc=5):
4304    def DFA(self, meta_group_by: str = "sets", sets: dict = {}, n_proc=5):
4305        """
4306        Perform Differential Feature Analysis (DFA) on specified data groups.
4307
4308        This method conducts DFA using a grouping factor from metadata and a dictionary of sets for comparison. It allows for the identification of significant differences across defined sets.
4309
4310        The analysis includes:
4311        - Mann–Whitney U test
4312        - Percentage of non-zero values
4313        - Means and standard deviations
4314        - Effect size metric (ESM)
4315        - Benjamini–Hochberg FDR correction
4316        - Fold-change and log2 fold-change
4317
4318        Parameters
4319        ----------
4320        meta_group_by : str, optional
4321            Metadata column used for grouping during the analysis.
4322            Default is ``'sets'``.
4323            To view available grouping categories, use ``self.groups``.
4324
4325        sets : dict, optional
4326            Dictionary defining groups for pairwise comparison.
4327            Keys correspond to group names, and values are lists of labels
4328            belonging to each group.
4329
4330            Example
4331            -------
4332            >>> sets = {
4333            ...     'healthy': ['21q'],
4334            ...     'disease': ['71q', '77q', '109q']
4335            ... }
4336            In this configuration, the *healthy* group is compared against the
4337            aggregated *disease* groups.
4338
4339        n_proc : int, optional
4340            Number of CPU cores used for parallel processing.
4341            Default is ``5``.
4342
4343        Returns
4344        -------
4345        pandas.DataFrame or None
4346            A DataFrame containing statistical results for each feature, including:
4347
4348            - ``feature`` : str
4349            - ``p_val`` : float
4350            - ``adj_pval`` : float
4351            - ``pct_valid`` : float
4352            - ``pct_ctrl`` : float
4353            - ``avg_valid`` : float
4354            - ``avg_ctrl`` : float
4355            - ``sd_valid`` : float
4356            - ``sd_ctrl`` : float
4357            - ``esm`` : float
4358            - ``FC`` : float
4359            - ``log(FC)`` : float
4360            - ``norm_diff`` : float
4361            - ``valid_group`` : str
4362            - ``-log(p_val)`` : float
4363
4364            If ``sets`` is ``None``, results for each group are concatenated.
4365
4366            Returns ``None`` in case of errors or invalid parameters.
4367
4368        Notes
4369        -----
4370        - Columns containing only zeros are automatically removed.
4371        - p-values equal for both groups produce ``p_val = 1``.
4372        - Benjamini–Hochberg correction is applied separately within each group comparison.
4373        - Fold-change is stabilized using a small, data-derived ``low_factor``.
4374        - Uses ``Mann–Whitney U`` test with ``alternative='two-sided'``.
4375
4376        """
4377
4378        tmp_data = self.tmp_data.copy()
4379
4380        tmp_data = tmp_data.select_dtypes(include="number")
4381
4382        tmp_metadata = self.tmp_metadata.copy()
4383
4384        if len(sets.keys()) >= 2:
4385            print("\nAnalysis strated on provided sets dictionary and meta_group_by...")
4386            tmp_data.index = list(tmp_metadata[meta_group_by])
4387            tmp_metadata["sets"] = tmp_metadata[meta_group_by]
4388            results = statistic(
4389                tmp_data.transpose(), sets=sets, metadata=tmp_metadata, n_proc=n_proc
4390            )
4391
4392        else:
4393            print(
4394                "\nAnalysis strated on for all groups to each other in meta_group_by..."
4395            )
4396            tmp_data.index = list(tmp_metadata[meta_group_by])
4397            tmp_metadata["sets"] = tmp_metadata[meta_group_by]
4398            results = statistic(
4399                tmp_data.transpose(), sets=None, metadata=tmp_metadata, n_proc=n_proc
4400            )
4401
4402        self.DFA_results = results

Perform Differential Feature Analysis (DFA) on specified data groups.

This method conducts DFA using a grouping factor from metadata and a dictionary of sets for comparison. It allows for the identification of significant differences across defined sets.

The analysis includes:

  • Mann–Whitney U test
  • Percentage of non-zero values
  • Means and standard deviations
  • Effect size metric (ESM)
  • Benjamini–Hochberg FDR correction
  • Fold-change and log2 fold-change

Parameters

meta_group_by : str, optional Metadata column used for grouping during the analysis. Default is 'sets'. To view available grouping categories, use self.groups.

sets : dict, optional Dictionary defining groups for pairwise comparison. Keys correspond to group names, and values are lists of labels belonging to each group.

Example
-------
>>> sets = {
...     'healthy': ['21q'],
...     'disease': ['71q', '77q', '109q']
... }
In this configuration, the *healthy* group is compared against the
aggregated *disease* groups.

n_proc : int, optional Number of CPU cores used for parallel processing. Default is 5.

Returns

pandas.DataFrame or None A DataFrame containing statistical results for each feature, including:

- ``feature`` : str
- ``p_val`` : float
- ``adj_pval`` : float
- ``pct_valid`` : float
- ``pct_ctrl`` : float
- ``avg_valid`` : float
- ``avg_ctrl`` : float
- ``sd_valid`` : float
- ``sd_ctrl`` : float
- ``esm`` : float
- ``FC`` : float
- ``log(FC)`` : float
- ``norm_diff`` : float
- ``valid_group`` : str
- ``-log(p_val)`` : float

If ``sets`` is ``None``, results for each group are concatenated.

Returns ``None`` in case of errors or invalid parameters.

Notes

  • Columns containing only zeros are automatically removed.
  • p-values equal for both groups produce p_val = 1.
  • Benjamini–Hochberg correction is applied separately within each group comparison.
  • Fold-change is stabilized using a small, data-derived low_factor.
  • Uses Mann–Whitney U test with alternative='two-sided'.
def heatmap_DFA( self, p_value: float | int = 0.05, top_n: int = 5, scale: bool = False, clustering: str | None = 'ward', figsize=(10, 5)):
4404    def heatmap_DFA(
4405        self,
4406        p_value: float | int = 0.05,
4407        top_n: int = 5,
4408        scale: bool = False,
4409        clustering: str | None = "ward",
4410        figsize=(10, 5),
4411    ):
4412        """
4413        Generate a heatmap of the top DFA features filtered by p-value and log fold change.
4414
4415        Parameters
4416        ----------
4417        p_value : float or int, optional
4418            Significance threshold used to filter features by their p-value.
4419            Only features with p_val < p_value are included. Default is 0.05.
4420
4421        top_n : int, optional
4422            Number of top features selected per group based on the 'esm' score.
4423            Default is 5.
4424
4425        scale : bool, optional
4426            Whether to apply Min–Max scaling to heatmap values across features.
4427            Default is False.
4428
4429        clustering : str or None, optional
4430            Hierarchical clustering method applied to rows/columns of the heatmap.
4431            If None, clustering is disabled. Default is 'ward'.
4432
4433        figsize : tuple, optional
4434            Size of the resulting matplotlib figure. Default is (10, 5).
4435
4436        Notes
4437        -----
4438        - Only features with a positive log fold change ('log(FC)' > 0) are considered.
4439        - Heatmap values represent -log10(p_value) for visualization.
4440        - If `scale=True`, values are normalized using Min–Max scaling.
4441        - The generated figure is displayed and stored in `self.DFA_plot`.
4442        """
4443
4444        df_reduced = self.DFA_results.copy()
4445
4446        df_reduced = df_reduced[df_reduced["log(FC)"] > 0]
4447
4448        df_reduced = df_reduced[df_reduced["p_val"] < p_value]
4449
4450        df_reduced = (
4451            df_reduced.sort_values(["valid_group", "esm"], ascending=[True, False])
4452            .groupby("valid_group", as_index=False)
4453            .head(top_n)
4454        )
4455
4456        heatmap_data = df_reduced.pivot(
4457            index="feature", columns="valid_group", values="-log(p_val)"
4458        ).fillna(0)
4459
4460        label = "-log10(p_value)"
4461
4462        if scale:
4463            label = f"scaled({label})"
4464            scaler = MinMaxScaler()
4465            heatmap_data = pd.DataFrame(
4466                scaler.fit_transform(heatmap_data),
4467                index=heatmap_data.index,
4468                columns=heatmap_data.columns,
4469            )
4470
4471        if clustering is not None:
4472            Z_rows = linkage(heatmap_data.values, method=clustering)
4473            row_order = leaves_list(Z_rows)
4474
4475            Z_cols = linkage(heatmap_data.values.T, method=clustering)
4476            col_order = leaves_list(Z_cols)
4477
4478            heatmap_data = heatmap_data.iloc[row_order, col_order]
4479
4480        figure = plt.figure(figsize=figsize)
4481        sns.heatmap(
4482            heatmap_data,
4483            cmap="viridis",
4484            linewidths=0.5,
4485            linecolor="gray",
4486            cbar_kws={"label": label},
4487            fmt=".2f",
4488        )
4489        plt.ylabel("Feature")
4490        plt.xlabel("Cluster")
4491        plt.xticks(rotation=30, ha="right")
4492
4493        plt.tight_layout()
4494
4495        if cfg._DISPLAY_MODE:
4496            plt.show()
4497
4498        self.DFA_plot = figure

Generate a heatmap of the top DFA features filtered by p-value and log fold change.

Parameters

p_value : float or int, optional Significance threshold used to filter features by their p-value. Only features with p_val < p_value are included. Default is 0.05.

top_n : int, optional Number of top features selected per group based on the 'esm' score. Default is 5.

scale : bool, optional Whether to apply Min–Max scaling to heatmap values across features. Default is False.

clustering : str or None, optional Hierarchical clustering method applied to rows/columns of the heatmap. If None, clustering is disabled. Default is 'ward'.

figsize : tuple, optional Size of the resulting matplotlib figure. Default is (10, 5).

Notes

  • Only features with a positive log fold change ('log(FC)' > 0) are considered.
  • Heatmap values represent -log10(p_value) for visualization.
  • If scale=True, values are normalized using Min–Max scaling.
  • The generated figure is displayed and stored in self.DFA_plot.
def get_DFA_plot(self, show: bool = True):
4500    def get_DFA_plot(self, show: bool = True):
4501        """
4502        Retrieve the heatmap figure generated by `heatmap_DFA()`.
4503
4504        Parameters
4505        ----------
4506        show : bool, optional
4507            Whether to display the stored heatmap figure. Default is True.
4508
4509        Returns
4510        -------
4511        matplotlib.figure.Figure
4512            The figure object containing the DFA heatmap.
4513        """
4514
4515        if self.DFA_plot is None:
4516            print("\nNo results to return! Please run the heatmap_DFA() method first.")
4517        else:
4518            if cfg._DISPLAY_MODE:
4519                if show is True:
4520                    self.DFA_plot
4521                    try:
4522                        display(self.DFA_plot)
4523                    except:
4524                        None
4525
4526            return self.DFA_plot

Retrieve the heatmap figure generated by heatmap_DFA().

Parameters

show : bool, optional Whether to display the stored heatmap figure. Default is True.

Returns

matplotlib.figure.Figure The figure object containing the DFA heatmap.

def print_avaiable_features(self):
4528    def print_avaiable_features(self):
4529        """
4530        Print the available features (columns) in the current dataset.
4531
4532        This method lists all column names in `self.tmp_data` to help identify which features are available for analysis.
4533
4534        Example
4535        -------
4536        >>> group_analysis.print_avaiable_features()
4537        """
4538
4539        print("Avaiable features:")
4540        for cl in self.tmp_data.columns:
4541            print(cl)

Print the available features (columns) in the current dataset.

This method lists all column names in self.tmp_data to help identify which features are available for analysis.

Example

>>> group_analysis.print_avaiable_features()
def proportion_analysis( self, grouping_col: str = 'sets', val_col: str = 'nuclei_per_img', grouping_dict=None, omit=None):
4543    def proportion_analysis(
4544        self,
4545        grouping_col: str = "sets",
4546        val_col: str = "nuclei_per_img",
4547        grouping_dict=None,
4548        omit=None,
4549    ):
4550        """
4551        Perform proportion analysis by comparing the distribution of values across groups.
4552
4553        This method analyzes the distribution of values (e.g., nuclei counts) across different groups defined in the dataset. It can optionally group categories, omit specific values, and produces both statistical results and a visualization.
4554
4555        Parameters
4556        ----------
4557        grouping_col : str, optional
4558            Column to group by. Default is 'sets'.
4559
4560        val_col : str, optional
4561            Column containing the values to analyze. Default is 'nuclei_per_img'.
4562
4563        grouping_dict : dict or None, optional
4564            Dictionary mapping new group names to categories in `grouping_col`. If None, analysis is based on the original groups.
4565
4566        omit : str, list, or None, optional
4567            Values to exclude from the analysis. Default is None.
4568
4569        Attributes
4570        ----------
4571        proportion_stats : pd.DataFrame
4572            DataFrame containing chi-square test results for pairwise group comparisons.
4573
4574        proportion_plot : matplotlib.figure.Figure
4575            Plot visualizing the proportions across groups.
4576
4577        Example
4578        -------
4579        >>> group_analysis.proportion_analysis(
4580        ...     grouping_col='sets',
4581        ...     val_col='nuclei_per_img',
4582        ...     grouping_dict={'Group A': [1, 2], 'Group B': [3, 4]},
4583        ...     omit=5
4584        ... )
4585        """
4586
4587        andata = self.tmp_data.copy()
4588
4589        andata[grouping_col] = list(self.tmp_metadata[grouping_col])
4590
4591        andata = andata[[grouping_col, val_col]]
4592
4593        if omit is not None:
4594            if isinstance(omit, list):
4595                andata = andata[~andata[val_col].isin(omit)]
4596            else:
4597                andata = andata[andata[val_col] != omit]
4598
4599        andata = andata.reset_index(drop=True)
4600        andata["index_col"] = andata.index
4601
4602        if isinstance(grouping_dict, dict):
4603            for k in grouping_dict.keys():
4604                andata.loc[
4605                    andata[grouping_col].isin(grouping_dict[k]), grouping_col
4606                ] = k
4607
4608        df_pivot = andata.pivot_table(
4609            index=val_col,
4610            columns=grouping_col,
4611            values="index_col",
4612            aggfunc="count",
4613            fill_value=0,
4614        )
4615
4616        Z_rows = linkage(df_pivot.values, method="ward")
4617        row_order = leaves_list(Z_rows)
4618
4619        Z_cols = linkage(df_pivot.values.T, method="ward")
4620        col_order = leaves_list(Z_cols)
4621
4622        df_pivot = df_pivot.iloc[row_order, col_order]
4623
4624        chi_df = chi_pairs(df_pivot)
4625
4626        self.proportion_stats = chi_pairs(df_pivot)
4627
4628        chi_df["Significance_Label"] = chi_df["p-value"].apply(get_significance_label)
4629
4630        self.proportion_plot = prop_plot(df_pivot, chi_df)

Perform proportion analysis by comparing the distribution of values across groups.

This method analyzes the distribution of values (e.g., nuclei counts) across different groups defined in the dataset. It can optionally group categories, omit specific values, and produces both statistical results and a visualization.

Parameters

grouping_col : str, optional Column to group by. Default is 'sets'.

val_col : str, optional Column containing the values to analyze. Default is 'nuclei_per_img'.

grouping_dict : dict or None, optional Dictionary mapping new group names to categories in grouping_col. If None, analysis is based on the original groups.

omit : str, list, or None, optional Values to exclude from the analysis. Default is None.

Attributes

proportion_stats : pd.DataFrame DataFrame containing chi-square test results for pairwise group comparisons.

proportion_plot : matplotlib.figure.Figure Plot visualizing the proportions across groups.

Example

>>> group_analysis.proportion_analysis(
...     grouping_col='sets',
...     val_col='nuclei_per_img',
...     grouping_dict={'Group A': [1, 2], 'Group B': [3, 4]},
...     omit=5
... )
def get_proportion_plot(self, show: bool = True):
4632    def get_proportion_plot(self, show: bool = True):
4633        """
4634        Retrieve the proportion bar plot generated by the `proportion_analysis()` method.
4635
4636        Parameters
4637        ----------
4638        show : bool, optional
4639            Whether to display the proportion bar plot. Default is True.
4640
4641        Returns
4642        -------
4643        matplotlib.figure.Figure
4644            The figure object containing the proportion bar plot.
4645        """
4646
4647        if self.proportion_plot is None:
4648            print(
4649                "\nNo results to return! Please run the proportion_analysis() method first."
4650            )
4651        else:
4652            if cfg._DISPLAY_MODE:
4653                if show:
4654                    self.proportion_plot
4655                    try:
4656                        display(self.proportion_plot)
4657                    except:
4658                        None
4659
4660            return self.proportion_plot

Retrieve the proportion bar plot generated by the proportion_analysis() method.

Parameters

show : bool, optional Whether to display the proportion bar plot. Default is True.

Returns

matplotlib.figure.Figure The figure object containing the proportion bar plot.

def get_proportion_stats(self):
4662    def get_proportion_stats(self):
4663        """
4664        Retrieve the proportion statistics computed by the `proportion_analysis()` method.
4665
4666        Returns
4667        -------
4668        pd.DataFrame
4669            The proportion statistics stored in `self.proportion_stats`.
4670        """
4671
4672        if None in self.proportion_stats:
4673            print(
4674                "\nNo results to return! Please run the proportion_analysis() method first."
4675            )
4676        else:
4677            return self.proportion_stats

Retrieve the proportion statistics computed by the proportion_analysis() method.

Returns

pd.DataFrame The proportion statistics stored in self.proportion_stats.