jimg_ncd.nuclei
1import base64 2import copy 3import glob 4import json 5import os 6import random 7import re 8import tempfile 9import webbrowser 10from collections import Counter 11from io import BytesIO 12 13import cv2 14import harmonypy as harmonize 15import matplotlib.pyplot as plt 16import numpy as np 17import pandas as pd 18import plotly.io as pio 19import plotly.offline as pyo 20import seaborn as sns 21import skimage 22import umap 23from csbdeep.utils import normalize 24from scipy.cluster.hierarchy import leaves_list, linkage 25from skimage import measure 26from sklearn.cluster import DBSCAN 27from sklearn.decomposition import PCA 28from sklearn.preprocessing import MinMaxScaler, StandardScaler 29from stardist.models import StarDist2D 30from stardist.plot import render_label 31from tqdm import tqdm 32 33pio.renderers.default = "browser" 34 35import jimg_ncd.config as cfg 36 37from .utils import * 38 39random.seed(42) 40 41 42# new features (nuclei adjustment and repair images) 43 44 45class RepTools: 46 """ 47 A utility class for processing and repairing nuclei data. 48 Provides methods for extracting subsets, removing outliers, computing geometrical features, 49 and merging/splitting nuclei based on spatial and intensity criteria. 50 """ 51 52 def extract_dict_by_indices(self, d, indices): 53 """ 54 Extracts elements from all dictionary lists using provided indices. 55 56 Parameters 57 ---------- 58 d : dict 59 Dictionary with list values. 60 61 indices : list 62 List of indices to extract from each dictionary entry. 63 64 Returns 65 ------- 66 dict 67 Dictionary containing only the selected elements. 68 """ 69 70 return { 71 key: [values[i] for i in indices if i < len(values)] 72 for key, values in d.items() 73 } 74 75 def drop_outlires(self, row, diff_FC_intensity=0.95, circ=0.6): 76 """ 77 Identify indices of nuclei that are considered outliers based on circularity and intensity. 78 79 Parameters 80 ---------- 81 row : dict 82 Dictionary containing nuclei properties, including 'circularity' and 'intensity_mean'. 83 84 diff_FC_intensity : float 85 Fraction of mean intensity below which a nucleus is considered an outlier. 86 87 circ : float 88 Minimum circularity threshold for nuclei to be considered. 89 90 Returns 91 ------- 92 list 93 List of indices to drop as outliers. 94 """ 95 96 cd = [] 97 for n, _ in enumerate(row["circularity"]): 98 if row["circularity"][n] > circ: 99 cd.append(n) 100 101 row = self.extract_dict_by_indices(row, cd) 102 103 drop = [] 104 is_mean = np.mean(row["intensity_mean"]) 105 106 for n, _ in enumerate(row["intensity_mean"]): 107 FC_mean = row["intensity_mean"][n] / is_mean 108 if FC_mean < diff_FC_intensity: 109 drop.append(n) 110 111 return drop 112 113 def nn(self, coords): 114 """ 115 Compute close neighbors between nuclei coordinates using a threshold distance. 116 117 Parameters 118 ---------- 119 coords : list 120 List of numpy arrays, each containing coordinates for a nucleus. 121 122 Returns 123 ------- 124 dict 125 Dictionary mapping pairs of nuclei indices to the number of close neighbors. 126 """ 127 128 full_list = {} 129 for i in range(len(coords)): 130 for j in range(len(coords)): 131 if i != j: 132 133 tree1 = cKDTree(coords[i]) 134 135 distances, indices = tree1.query(coords[j]) 136 137 threshold = 2 138 close_neighbors = np.sum(distances < threshold) 139 140 full_list[f"{i} --> {j}"] = close_neighbors 141 142 return full_list 143 144 def compute_axes_length(self, contour): 145 """ 146 Compute major and minor axis lengths of a nucleus from its contour. 147 148 Parameters 149 ---------- 150 contour : np.ndarray 151 Coordinates of nucleus contour points. 152 153 Returns 154 ------- 155 tuple 156 Major and minor axis lengths. 157 """ 158 159 cov = np.cov(contour.T) 160 161 eigvals, _ = np.linalg.eigh(cov) 162 163 axis_major_length = 2 * np.sqrt(eigvals.max()) 164 axis_minor_length = 2 * np.sqrt(eigvals.min()) 165 166 return axis_major_length, axis_minor_length 167 168 def compute_eccentricity(self, contour): 169 """ 170 Compute eccentricity of a nucleus from its contour. 171 172 Parameters 173 ---------- 174 contour : np.ndarray 175 Coordinates of nucleus contour points. 176 177 Returns 178 ------- 179 float 180 Eccentricity of the nucleus. 181 """ 182 183 cov = np.cov(contour.T) 184 eigvals, _ = np.linalg.eigh(cov) 185 186 eccentricity = np.sqrt(1 - (eigvals.min() / eigvals.max())) 187 return eccentricity 188 189 def compute_feret_diameter(self, contour): 190 """ 191 Compute the Feret diameter of a given contour. 192 193 The Feret diameter is defined as the maximum pairwise Euclidean distance between points in the contour. 194 195 Parameters 196 ---------- 197 contour : np.ndarray 198 Array of shape (N, 2) representing the contour coordinates. 199 200 Returns 201 ------- 202 float 203 The maximum distance between any two points in the contour. 204 """ 205 206 rect = cv2.minAreaRect(contour) 207 (w, h) = rect[1] 208 return max(w, h) 209 210 def compute_perimeter(self, contour): 211 """ 212 Compute the perimeter of a contour. 213 214 The perimeter is calculated as the sum of Euclidean distances between consecutive points in the contour. 215 216 Parameters 217 ---------- 218 contour : np.ndarray 219 Array of shape (N, 2) representing the contour coordinates. 220 221 Returns 222 ------- 223 float 224 Perimeter length of the contour. 225 """ 226 227 return np.sum(np.linalg.norm(np.diff(contour, axis=0), axis=1)) 228 229 def compute_circularity(self, contour): 230 """ 231 Compute the circularity of a contour. 232 233 Circularity is a measure of how close the shape is to a perfect circle. 234 It is calculated as 4 * pi * (area / perimeter^2). 235 236 Parameters 237 ---------- 238 contour : np.ndarray 239 Array of shape (N, 2) representing the contour coordinates. 240 241 Returns 242 ------- 243 float 244 Circularity of the contour. Value ranges from 0 to 1, where 1 indicates a perfect circle. 245 """ 246 perimeter = self.compute_perimeter(contour) 247 hull = ConvexHull(contour) 248 area = hull.volume 249 250 return (4 * np.pi * area) / (perimeter**2) 251 252 def repairing_nuclei(self, results): 253 """ 254 Repair nuclei segmentation results by merging or removing outlier nuclei. 255 256 This method adjusts nuclei detection results based on global and local thresholds, circularity, nearest neighbor relationships, 257 and merges small or fragmented nuclei when appropriate. It also recalculates key morphological properties for merged nuclei. 258 259 Parameters 260 ---------- 261 results : dict 262 Dictionary where keys are image identifiers and values are dictionaries containing detected nuclei properties 263 (e.g., 'area', 'coords', 'label', 'circularity', etc.). 264 265 Returns 266 ------- 267 dict 268 A dictionary in the same structure as `results`, but with repaired nuclei information after merging or removing outliers. 269 """ 270 271 # repairing nuclei 272 mean_sum_area = [] 273 im = [] 274 n = [] 275 for r in tqdm(results.keys()): 276 mean_sum_area.append(np.sum(results[r]["area"])) 277 n.append(len(results[r]["area"])) 278 im.append(r) 279 280 mean_sum_area_sum = np.mean(mean_sum_area) 281 282 results_dict = {} 283 284 print("\nImage repairing:\n\n") 285 286 for i, m in tqdm(zip(im, n), total=len(im)): 287 288 if ( 289 m > 1 290 and np.sum(results[i]["area"]) / mean_sum_area_sum 291 < self.hyperparameter_nuclei["FC_diff_global"] 292 ): 293 # adjustment to global changes 294 295 temporary_dict = results[i] 296 297 check_drop = self.drop_outlires( 298 temporary_dict, 299 diff_FC_intensity=self.hyperparameter_nuclei[ 300 "FC_diff_local_intensity" 301 ], 302 circ=self.hyperparameter_nuclei["circularity"], 303 ) 304 305 to_final = [ 306 x 307 for x in list(range(len(temporary_dict["area"]))) 308 if int(x) not in check_drop 309 ] 310 311 tmp = self.extract_dict_by_indices(temporary_dict, to_final) 312 313 to_concat = [] 314 315 if len(tmp["coords"]) > 1: 316 317 results_nn = self.nn(tmp["coords"]) 318 319 for kn in results_nn.keys(): 320 if results_nn[kn] > self.hyperparameter_nuclei["nn_min"]: 321 to_concat.append(int(re.sub(" --> .*", "", kn))) 322 to_concat.append(int(re.sub(".* --> ", "", kn))) 323 324 to_concat = list(set(to_concat)) 325 326 to_rest = [ 327 x for x in list(range(len(tmp["area"]))) if x not in to_concat 328 ] 329 330 # 331 if len(to_concat) > 1: 332 to_concat_dict = self.extract_dict_by_indices(tmp, to_concat) 333 to_concat_dict["coords"] = [np.vstack(to_concat_dict["coords"])] 334 to_concat_dict["label"] = [min(to_concat_dict["label"])] 335 to_concat_dict["area"] = [np.sum(to_concat_dict["area"])] 336 to_concat_dict["area_bbox"] = [np.sum(to_concat_dict["area_bbox"])] 337 to_concat_dict["area_convex"] = [ 338 np.sum(to_concat_dict["area_convex"]) 339 ] 340 to_concat_dict["area_filled"] = [ 341 np.sum(to_concat_dict["area_filled"]) 342 ] 343 to_concat_dict["intensity_max"] = [ 344 np.max(to_concat_dict["intensity_max"]) 345 ] 346 to_concat_dict["intensity_mean"] = [ 347 np.mean(to_concat_dict["intensity_mean"]) 348 ] 349 to_concat_dict["intensity_min"] = [ 350 np.min(to_concat_dict["intensity_min"]) 351 ] 352 major, minor = self.compute_axes_length(to_concat_dict["coords"][0]) 353 to_concat_dict["axis_major_length"] = [major] 354 to_concat_dict["axis_minor_length"] = [minor] 355 to_concat_dict["ratio"] = [minor / major] 356 ecc = self.compute_eccentricity(to_concat_dict["coords"][0]) 357 to_concat_dict["eccentricity"] = [ecc] 358 to_concat_dict["equivalent_diameter_area"] = [ 359 np.sum(to_concat_dict["equivalent_diameter_area"]) 360 ] 361 feret_diameter = self.compute_feret_diameter( 362 to_concat_dict["coords"][0] 363 ) 364 to_concat_dict["feret_diameter_max"] = [feret_diameter] 365 to_concat_dict["solidity"] = [np.mean(to_concat_dict["solidity"])] 366 to_concat_dict["perimeter"] = [np.sum(to_concat_dict["perimeter"])] 367 to_concat_dict["perimeter_crofton"] = [ 368 np.sum(to_concat_dict["perimeter_crofton"]) 369 ] 370 to_concat_dict["circularity"] = [ 371 np.mean(to_concat_dict["circularity"]) 372 ] 373 374 to_rest_dict = self.extract_dict_by_indices(tmp, to_rest) 375 376 for ik in to_rest_dict.keys(): 377 to_rest_dict[ik] = to_rest_dict[ik] + to_concat_dict[ik] 378 379 results_dict[i] = to_rest_dict 380 381 else: 382 results_dict[i] = tmp 383 384 elif ( 385 m == 1 386 and results[i]["circularity"][0] 387 > self.hyperparameter_nuclei["circularity"] 388 ): 389 390 results_dict[i] = results[i] 391 392 return results_dict 393 394 395class ImagesManagement: 396 """ 397 A class for managing, preprocessing, merging, stitching, saving, and loading 398 microscopy or flow cytometry images used in NucleiFinder-based analyses. 399 400 This class provides a unified interface for: 401 402 - loading image data, 403 - selecting images by IDs, 404 - preprocessing images (equalization, CLAHE, gamma/contrast/brightness adjustment), 405 - merging images with user-defined intensity ratios, 406 - stitching images horizontally, 407 - retrieving and saving processed image sets. 408 409 The class stores original or loaded data in the ``results_images`` attribute, 410 and all processed images in ``prepared_images`` under user-defined acronyms. 411 These acronyms allow flexible retrieval with ``get_prepared_images()`` 412 and exporting via ``save_prepared_images()``. 413 414 Parameters 415 ---------- 416 images_ids : list[int] 417 List of selected image identifiers. 418 419 result_dict : dict or None 420 Dictionary containing raw or preprocessed images. 421 If ``None``, images may later be loaded or processed from file. 422 423 experiment_name : str 424 Name of the experiment. Used for saving and structuring output. 425 426 Attributes 427 ---------- 428 images_ids : list[int] 429 IDs of images managed by the class. 430 431 results_images : dict or None 432 Dictionary containing raw or analysis-derived images. 433 434 experiment_name : str 435 Name of the experiment. Used in saved filenames. 436 437 prepared_images : dict 438 Container for processed/adjusted/merged/stitched images, 439 indexed by user-defined acronyms. 440 441 Notes 442 ----- 443 Processed images are stored only in memory until saved explicitly with 444 ``save_prepared_images()``. 445 446 Raw images loaded from NucleiFinder analyses can be saved for later reuse 447 in a serialized `.inuc` format using ``save_raw()``. 448 449 Examples 450 -------- 451 Load image results from an analysis: 452 453 >>> manager = ImagesManagement.load_experimental_images(results, "experiment1") 454 455 Adjust selected images: 456 457 >>> manager.adjust_images( 458 ... acronyme="adj", 459 ... path_to_images="path/to/imgs", 460 ... eq=True, 461 ... clahe=True 462 ... ) 463 464 Merge multiple prepared sets: 465 466 >>> manager.image_merging(["adj", "other"], ratio_list=[0.7, 0.3]) 467 468 Retrieve processed images: 469 470 >>> imgs = manager.get_prepared_images("adj") 471 472 Save stitched images to disk: 473 474 >>> manager.save_prepared_images("stitched_adj_other", "./output/") 475 """ 476 477 def __init__(self, images_ids, result_dict, experiment_name): 478 """ 479 Initialize the ImagesManagement object. 480 481 Parameters 482 ---------- 483 images_ids : list[int] 484 List of image identifiers. 485 486 result_dict : dict or None 487 Dictionary containing processed images. 488 489 experiment_name : str 490 Name of the experiment. 491 """ 492 493 self.images_ids = images_ids 494 """Stores the list of image IDs managed by this instance.""" 495 self.results_images = result_dict 496 """Stores dictionary containing processed images.""" 497 self.experiment_name = experiment_name 498 """Stores the experiment name for file naming and organizational purposes.""" 499 self.prepared_images = {} 500 """Dictionary for storing processed images (adjusted, merged, stitched), 501 indexed by user-defined acronyms for flexible retrieval.""" 502 503 @classmethod 504 def load_from_dict(cls, path: str, experiment_name: str): 505 """ 506 Load an ImagesManagement instance from a `.inuc` serialized dictionary. 507 508 Parameters 509 ---------- 510 path : str 511 Path to the `.inuc` file exported with `save_raw()`. 512 513 experiment_name : str 514 Name of the experiment. 515 516 Returns 517 ------- 518 ImagesManagement 519 A reconstructed ImagesManagement object. 520 """ 521 522 if ".inuc" in path: 523 524 if os.path.exists(path): 525 526 loaded_data = np.load(path) 527 data_dict = {key: loaded_data[key] for key in loaded_data} 528 529 id_list = [] 530 531 for k in data_dict.keys(): 532 id_list.append(re.sub("_.*", "", k)) 533 534 return cls(id_list, data_dict, experiment_name) 535 536 else: 537 raise ValueError("\nInvalid path!") 538 539 else: 540 raise ValueError( 541 "\nInvalid dictionary to load. It must contain a .inuc extension!" 542 ) 543 544 @classmethod 545 def load_experimental_images(cls, results_dict: dict, experiment_name: str): 546 """ 547 Load results exported from NucleiFinder series analysis. 548 549 Initialize the object with results from series_analysis_nuclei() 550 or series_analysis_chromatinization() of the NucleiFinder class. 551 552 553 Parameters 554 ---------- 555 results_dict : dict 556 Dictionary returned by `series_analysis_nuclei()` or 557 `series_analysis_chromatinization()`. 558 559 experiment_name : str 560 Name of the experiment. 561 562 Returns 563 ------- 564 ImagesManagement 565 566 """ 567 568 res_dict = {} 569 id_list = [] 570 571 if set(results_dict[list(results_dict.keys())[0]].keys()) != set( 572 ["stats", "img"] 573 ): 574 raise ValueError( 575 "Incorrect data provided. The data must come from series_analysis_nuclei() " 576 "or series_analysis_chromatinization() of the NucleiFinder class." 577 ) 578 579 for k in results_dict.keys(): 580 res_dict[k] = results_dict[k]["img"] 581 id_list.append(re.sub("_.*", "", k)) 582 583 return cls(id_list, res_dict, experiment_name) 584 585 @classmethod 586 def load_images_ids(cls, images_ids: list, experiment_name: str): 587 """ 588 Initialize the object with list of images IDs for porcesing. 589 590 Parameters 591 ---------- 592 images_ids : list[int] 593 List of selected image IDs. 594 595 experiment_name : str 596 Name of the experiment. 597 598 Returns 599 ------- 600 ImagesManagement 601 602 """ 603 604 if len(images_ids) == 0: 605 raise ValueError( 606 "Incorrect data provided. There must be a list of image IDs." 607 ) 608 609 return cls(images_ids, None, experiment_name) 610 611 def get_included_acronyms(self): 612 """ 613 Print the data acronyms for adjusted images, processed using the 614 self.adjust_images(), self.image_merging(), and self.image_stitching() methods. 615 616 Acronym information is essential for retrieving and saving data using 617 the self.get_prepared_images() and self.save_prepared_images() methods. 618 619 Notes 620 ----- 621 This method prints the list of available acronyms but does not return it. 622 623 """ 624 625 if len(self.prepared_images.keys()) > 0: 626 print("\nAvaiable stored images:\n") 627 for kd in self.prepared_images.keys(): 628 print(kd) 629 630 else: 631 print("Nothing to return!") 632 633 def get_prepared_images(self, acronyme=None): 634 """ 635 Retrieves the prepared images (returned from adjust_images()) stored in the object. 636 637 638 Parameters 639 ---------- 640 acronyme : str or None 641 Acronym identifying a processed image set. If None, prints available keys. 642 643 644 Returns 645 ------- 646 dict 647 Dictionary of prepared images. 648 """ 649 650 if acronyme is None: 651 652 self.get_included_acronyms() 653 654 else: 655 656 if acronyme in list(self.prepared_images.keys()): 657 return self.prepared_images[acronyme] 658 659 raise ValueError("Incorrect acronyme!") 660 661 def save_prepared_images(self, acronyme: str, path_to_save: str = ""): 662 """ 663 Saves prepared images (returned from adjust_images() method) to the specified directory. 664 665 Parameters 666 ---------- 667 path_to_save : str 668 Directory path where the images will be saved. Default is the current working directory. 669 670 """ 671 if acronyme is None: 672 673 self.get_included_acronyms() 674 675 else: 676 677 if acronyme in list(self.prepared_images.keys()): 678 679 path_to_save = os.path.join( 680 path_to_save, f"{self.experiment_name}_{acronyme}" 681 ) 682 683 if not os.path.exists(path_to_save): 684 os.makedirs(path_to_save, exist_ok=True) 685 686 for i in tqdm(self.prepared_images[acronyme].keys()): 687 cv2.imwrite( 688 os.path.join(path_to_save, i + ".png"), 689 self.prepared_images[acronyme][i], 690 ) 691 692 else: 693 raise ValueError("Incorrect acronyme!") 694 695 def adjust_images( 696 self, 697 acronyme: str, 698 path_to_images: str, 699 file_extension: str = "tif", 700 eq: bool = True, 701 clahe: bool = True, 702 kernal: tuple = (50, 50), 703 fille_name_part: str = "", 704 color: str = "gray", 705 max_intensity: int = 65535, 706 min_intenisty: int = 0, 707 brightness: int = 1000, 708 contrast: float = 1.0, 709 gamma: float = 1.0, 710 img_n: int = 0, 711 ): 712 """ 713 Prepares selected images for processing, applying histogram equalization and CLAHE, if required. 714 715 Parameters 716 ---------- 717 acronyme : str 718 Name of images being adjusted in this run. 719 720 path_to_images : str 721 Path to the directory containing images. 722 723 file_extension : str 724 Image file extension. Default is 'tiff'. 725 726 eq : bool 727 Whether to apply histogram equalization. Default is True. 728 729 clahe : bool 730 Whether to apply CLAHE. Default is True. 731 732 kernal : tuple 733 Kernel size for CLAHE. Default is (50, 50). 734 735 fille_name_part : str 736 Part of the file name to filter images. Default is an empty string. 737 738 color : str 739 Color space to use. Default is 'gray'. 740 741 max_intensity : int 742 Maximum intensity for image adjustment. Default is 65535. 743 744 min_intenisty : int 745 Minimum intensity for image adjustment. Default is 0. 746 747 brightness : int 748 Brightness adjustment value. Default is 1000. 749 750 contrast : float 751 Contrast adjustment factor. Default is 1.0. 752 753 gamma : float 754 Gamma correction factor. Default is 1.0. 755 756 img_n : int 757 Number of images to process. Default is 0, which means all images. 758 759 760 Returns 761 ------- 762 dict 763 Dictionary containing the processed images. 764 765 Notes 766 ----- 767 To access the processed images, use the ``get_prepared_images()`` method. 768 769 To save the processed images to disk, use the ``save_prepared_images()`` method. 770 """ 771 772 results_dict = {} 773 774 files = glob.glob(os.path.join(path_to_images, "*." + file_extension)) 775 776 if len(fille_name_part) > 0: 777 files = [x for x in files if fille_name_part.lower() in x.lower()] 778 779 selected_id = self.images_ids 780 781 if len(selected_id) > 0: 782 selected_id = [str(x) for x in selected_id] 783 files = [ 784 x 785 for x in files 786 if re.sub("_.*", "", os.path.basename(x)) in selected_id 787 ] 788 789 if img_n > 0: 790 791 files = random.sample(files, img_n) 792 793 for file in tqdm(files): 794 795 image = load_image(file) 796 797 try: 798 image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) 799 except: 800 pass 801 802 if eq is True: 803 image = equalizeHist_16bit(image) 804 805 if clahe is True: 806 image = clahe_16bit(image, kernal=kernal) 807 808 image = adjust_img_16bit( 809 img=image, 810 color=color, 811 max_intensity=max_intensity, 812 min_intenisty=min_intenisty, 813 brightness=brightness, 814 contrast=contrast, 815 gamma=gamma, 816 ) 817 818 results_dict[os.path.basename(file)] = image 819 820 self.prepared_images[acronyme] = results_dict 821 822 def image_merging(self, acronyms: list, ratio_list: list): 823 """ 824 Merge previously prepared images stored in `self.prepared_images`, 825 adjusted based on the image ratios. The used ratios adjust relative image intensity. 826 827 Parameters 828 ---------- 829 acronyme : list 830 List of image names to be merged. 831 832 ratio_list : list[float] 833 List of ratio intensity values (0.0–1.0) for the merged image. 834 The `acronyme` list and `ratio_list` must be of the same length. 835 836 Returns 837 ------- 838 dict 839 Dictionary of processed images. 840 841 Notes 842 ----- 843 To access the processed images, use the ``get_prepared_images()`` method. 844 845 To save the processed images to disk, use the ``save_prepared_images()`` method. 846 """ 847 848 for a in acronyms: 849 if a not in list(self.prepared_images.keys()): 850 raise ValueError(f"Incorrect {a} acronyme!") 851 852 results_img = {} 853 for k in self.images_ids: 854 img_list = [] 855 for a in acronyms: 856 nam = [ 857 x 858 for x in self.prepared_images[a].keys() 859 if str(k) == re.sub("_.*", "", x) 860 ] 861 if len(nam) == 0: 862 print(f"There were not images for {k} ids") 863 break 864 865 img_list.append(self.prepared_images[a][nam[0]]) 866 867 if len(img_list) == len(acronyms): 868 results_img[f'{k}_{"_".join(acronyms)}'] = merge_images( 869 img_list, ratio_list 870 ) 871 872 self.prepared_images[f'merged_{"_".join(acronyms)}'] = results_img 873 874 print(f'Images stored in self.prepared_images["merged_{"_".join(acronyms)}"]') 875 876 def image_stitching(self, acronyms: list, to_results_image: bool = False): 877 """ 878 Stitch (horizontally) previously prepared images stored in `self.prepared_images`. 879 880 Parameters 881 ---------- 882 acronyme : list 883 List of image names to be stitched. 884 885 to_results_image : bool 886 Boolean value indicating whether images obtained from the 887 `series_analysis_nuclei()` or `series_analysis_chromatinization()` 888 methods of the `NucleiFinder` class should be stitched to the right 889 side of the images in the `acronyme` list. 890 891 Returns 892 ------- 893 dict 894 Dictionary of processed images. 895 896 Notes 897 ----- 898 To access the processed images, use the ``get_prepared_images()`` method. 899 900 To save the processed images to disk, use the ``save_prepared_images()`` method. 901 """ 902 903 for a in acronyms: 904 if a not in list(self.prepared_images.keys()): 905 raise ValueError(f"Incorrect {a} acronyme!") 906 907 results_img = {} 908 for k in tqdm(self.images_ids): 909 img_list = [] 910 for a in acronyms: 911 nam = [ 912 x 913 for x in self.prepared_images[a].keys() 914 if str(k) == re.sub("_.*", "", x) 915 ] 916 if len(nam) == 0: 917 print(f"There were not images for {k} ids") 918 break 919 920 img_list.append(self.prepared_images[a][nam[0]]) 921 922 if to_results_image: 923 nam = [ 924 x 925 for x in self.results_images.keys() 926 if str(k) == re.sub("_.*", "", x) 927 ] 928 if len(nam) != 0: 929 img_list.append(self.results_images[nam[0]]) 930 931 if len(img_list) == len(acronyms) + 1: 932 results_img[f'{k}_{"_".join(acronyms)}_res'] = cv2.hconcat(img_list) 933 934 elif to_results_image is not False: 935 if len(img_list) == len(acronyms): 936 results_img[f'{k}_{"_".join(acronyms)}'] = cv2.hconcat(img_list) 937 938 self.prepared_images[f'stitched_{"_".join(acronyms)}'] = results_img 939 940 print(f'Images stored in self.prepared_images["stitched_{"_".join(acronyms)}"]') 941 942 def save_raw(self, path_to_save: str = ""): 943 """ 944 Save `self.results_images` loaded by the `self.load_experimental_images()` method, 945 obtained from the `series_analysis_nuclei()` or `series_analysis_chromatinization()` 946 methods of the `NucleiFinder` class for later usage with cls.load_from_dict() method. 947 The data will be saved with a `.inuc` extension. 948 949 Parameters 950 ---------- 951 path_to_save : str 952 The directory path where the images will be saved. 953 Default is the current working directory. 954 """ 955 956 full_path = os.path.join(path_to_save, f"{self.experiment_name}.inuc") 957 958 np.savez(full_path, **self.results_images) 959 960 961class NucleiFinder(ImageTools, RepTools): 962 """ 963 Implements a comprehensive pipeline for automated segmentation, 964 selection, and analysis of cell nuclei and their internal chromatin structure 965 in microscopy images. 966 967 It utilizes a pre-trained deep learning model (StarDist2D) for initial 968 nuclear identification, followed by the application of advanced morphological 969 and intensity filters, and a dedicated algorithm for quantifying chromatinization. 970 The class provides detailed control over the hyperparameters for both the 971 segmentation process and image preprocessing stages. 972 973 Parameters 974 ---------- 975 image : np.ndarray, optional 976 The input image (typically 16-bit) for analysis. 977 978 test_results : list, optional 979 Plots resulting from parameter testing (e.g., NMS/Prob combinations). 980 981 hyperparameter_nuclei : dict, optional 982 Parameters for nuclei segmentation and filtering (e.g., 'nms', 'prob', 'min_size', 'circularity'). 983 984 hyperparameter_chromatinization : dict, optional 985 Parameters for segmenting and filtering chromatin spots (e.g., 'cut_point', 'ratio'). 986 987 img_adj_par_chrom : dict, optional 988 Image adjustment parameters (gamma, contrast) specifically for chromatin analysis. 989 990 img_adj_par : dict, optional 991 Image adjustment parameters for nuclei segmentation. 992 993 show_plots : bool, optional 994 Flag controlling the automatic display of visual results. 995 996 nuclei_results : dict, optional 997 A dictionary storing numerical data (features) extracted from the nuclei. 998 999 images : dict, optional 1000 A dictionary storing output images and masks. 1001 1002 Attributes 1003 ---------- 1004 image : np.ndarray 1005 The currently loaded image for analysis. 1006 1007 test_results : list 1008 The visual outcomes of NMS/Prob parameter tests. 1009 1010 hyperparameter_nuclei : dict 1011 A dictionary of active parameters used by the `find_nuclei()` and `select_nuclei()` methods. 1012 1013 hyperparameter_chromatinization : dict 1014 A dictionary of active parameters used by the `nuclei_chromatinization()` method. 1015 1016 img_adj_par : dict 1017 Image correction parameters for nuclei segmentation. 1018 1019 img_adj_par_chrom : dict 1020 Image correction parameters for chromatin analysis. 1021 1022 show_plots : bool 1023 The state of the plot display flag. 1024 1025 nuclei_results : dict 1026 Stores feature dictionaries for: all detected ('nuclei'), selected ('nuclei_reduced'), 1027 and chromatinization data ('nuclei_chromatinization'). 1028 1029 images : dict 1030 Stores masks and images visualizing the results. 1031 1032 series_im : bool 1033 Flag indicating if the class is operating in a batch or series processing mode. 1034 1035 Methods 1036 ------- 1037 set_nms(nms) 1038 Sets the Non-Maximum Suppression (NMS) threshold. 1039 1040 set_prob(prob) 1041 Sets the segmentation probability threshold. 1042 1043 set_nuclei_circularity(circ) 1044 Sets the minimum required circularity for a nucleus. 1045 1046 set_nuclei_local_intenisty_FC(local_FC) 1047 Sets the factor used for removing false positives based on local intensity differences. 1048 1049 set_nuclei_global_area_FC(global_FC) 1050 Sets the factor used for removing size-based outlier false positives. 1051 1052 set_nuclei_size(size) 1053 Sets the minimum and maximum area (in pixels) for nuclei selection. 1054 1055 set_nuclei_min_mean_intensity(intensity) 1056 Sets the minimum required mean intensity value for a nucleus. 1057 1058 set_chromatinization_size(size) 1059 Sets the minimum and maximum area (in pixels) for chromatin spot selection. 1060 1061 set_chromatinization_cut_point(cut_point) 1062 Sets the factor used to adjust the chromatin segmentation threshold (Otsu's method). 1063 1064 set_adj_image_gamma(gamma) 1065 Sets the gamma correction for the nuclei image. 1066 1067 set_adj_chrom_contrast(contrast) 1068 Sets the contrast adjustment for the chromatinization image. 1069 1070 current_parameters_nuclei (property) 1071 Returns the active nuclei segmentation and filtering parameters. 1072 1073 find_nuclei() 1074 Performs nuclei segmentation using StarDist and extracts initial features. 1075 1076 select_nuclei() 1077 Filters the detected nuclei based on set morphological and intensity criteria. 1078 1079 nuclei_chromatinization() 1080 Performs quantitative and morphological analysis of chromatin spots in selected nuclei. 1081 1082 get_features(model_out, image) 1083 Calculates geometric and intensity features from a segmented mask (label image). 1084 1085 Notes 1086 ----- 1087 The typical analysis workflow follows this order: 1088 1. `input_image()` 1089 2. `find_nuclei()` 1090 3. `select_nuclei()` (Optional) 1091 4. `nuclei_chromatinization()` (Optional) 1092 """ 1093 1094 def __init__( 1095 self, 1096 image=None, 1097 test_results=None, 1098 hyperparameter_nuclei=None, 1099 hyperparameter_chromatinization=None, 1100 img_adj_par_chrom=None, 1101 img_adj_par=None, 1102 show_plots=None, 1103 nuclei_results=None, 1104 images=None, 1105 ): 1106 """ 1107 The main class for the detection and analysis of cell nuclei and their chromatinization 1108 in microscopy or flow cytometry images, utilizing the StarDist segmentation model. 1109 1110 This class inherits functionality for image processing (ImageTools) and 1111 results handling (RepTools). 1112 1113 Parameters 1114 ---------- 1115 image : np.ndarray, optional 1116 The input image for analysis. 1117 Default: None. 1118 1119 test_results : list, optional 1120 A list of plots or images resulting from parameter testing. 1121 Default: None. 1122 1123 hyperparameter_nuclei : dict, optional 1124 The segmentation parameters for nuclei detection. 1125 Default: 1126 {'nms': 0.8, 'prob': 0.4, 'max_size': 1000, 'min_size': 20, 1127 'circularity': 0.6, 'intensity_mean': 6553.5, 'nn_min': 10, 1128 'FC_diff_global': 1.5, 'FC_diff_local_intensity': 0.6} 1129 1130 hyperparameter_chromatinization : dict, optional 1131 The analysis parameters for chromatin spots within the nuclei. 1132 Default: 1133 {'max_size': 800, 'min_size': 2, 'ratio': 0.1, 'cut_point': 0.95} 1134 1135 img_adj_par_chrom : dict, optional 1136 Image adjustment parameters (gamma, contrast, brightness) for chromatin analysis. 1137 Default: {'gamma': 0.25, 'contrast': 5, 'brightness': 950} 1138 1139 img_adj_par : dict, optional 1140 Image adjustment parameters (gamma, contrast, brightness) for nuclei segmentation. 1141 Default: {'gamma': 0.9, 'contrast': 2, 'brightness': 1000} 1142 1143 show_plots : bool, optional 1144 Flag to determine whether results and plots should be displayed automatically. 1145 Default: True. 1146 1147 nuclei_results : dict, optional 1148 A dictionary storing the numerical results of the analysis. 1149 Default: {'nuclei': None, 'nuclei_reduced': None, 'nuclei_chromatinization': None} 1150 1151 images : dict, optional 1152 A dictionary storing the output images (e.g., masks). 1153 Default: {'nuclei': None, 'nuclei_reduced': None, 'nuclei_chromatinization': None} 1154 1155 Attributes 1156 ---------- 1157 image : np.ndarray 1158 The currently loaded image for analysis. 1159 1160 hyperparameter_nuclei : dict 1161 Active nuclei segmentation parameters. 1162 1163 hyperparameter_chromatinization : dict 1164 Active chromatinization analysis parameters. 1165 1166 img_adj_par : dict 1167 Active image correction parameters for nuclei segmentation. 1168 1169 img_adj_par_chrom : dict 1170 Active image correction parameters for chromatin analysis. 1171 1172 show_plots : bool 1173 The current state of the plot display flag. 1174 1175 series_im : bool 1176 Flag indicating if a series of images is being processed. 1177 1178 Notes 1179 ----- 1180 The default value for 'intensity_mean' in hyperparameter_nuclei is calculated 1181 as $(2^{16} - 1) / 10$, which represents 10% of the maximum 16-bit value (65535 / 10 = 6553.5). 1182 1183 The image adjustment parameters are crucial for optimizing contrast and brightness 1184 to improve the performance of both the StarDist model and the subsequent 1185 chromatin thresholding. 1186 """ 1187 1188 # Use default values if parameters are None 1189 self.image = image or None 1190 """Loaded input image.""" 1191 self.test_results = test_results or None 1192 """Results of parameter tests. 1193 1194 This attribute or method stores the outcomes of parameter testing procedures. 1195 For interactive browsing and inspection of the results, use the 1196 `browser_test(self)` method.""" 1197 1198 self.hyperparameter_nuclei = hyperparameter_nuclei or { 1199 "nms": 0.8, 1200 "prob": 0.4, 1201 "max_size": 1000, 1202 "min_size": 20, 1203 "circularity": 0.6, 1204 "intensity_mean": (2**16 - 1) / 10, 1205 "nn_min": 10, 1206 "FC_diff_global": 1.5, 1207 "FC_diff_local_intensity": 0.6, 1208 } 1209 """Active nuclei segmentation/filter parameters.""" 1210 1211 self.hyperparameter_chromatinization = hyperparameter_chromatinization or { 1212 "max_size": 800, 1213 "min_size": 2, 1214 "ratio": 0.1, 1215 "cut_point": 0.95, 1216 } 1217 """Active chromatin analysis parameters.""" 1218 1219 self.img_adj_par_chrom = img_adj_par_chrom or { 1220 "gamma": 0.25, 1221 "contrast": 5, 1222 "brightness": 950, 1223 } 1224 """Image adjustment for chromatin analysis.""" 1225 1226 self.img_adj_par = img_adj_par or { 1227 "gamma": 0.9, 1228 "contrast": 2, 1229 "brightness": 1000, 1230 } 1231 """Image adjustment for nuclei segmentation.""" 1232 1233 self.show_plots = show_plots or True 1234 """Flag controlling plot display.""" 1235 1236 self.nuclei_results = nuclei_results or { 1237 "nuclei": None, 1238 "nuclei_reduced": None, 1239 "nuclei_chromatinization": None, 1240 } 1241 """Stored dictionary of nuclei analysis results.""" 1242 1243 self.images = images or { 1244 "nuclei": None, 1245 "nuclei_reduced": None, 1246 "nuclei_chromatinization": None, 1247 } 1248 """Stored dictionary of images from nuclei analysis.""" 1249 1250 # sereies images 1251 self.series_im = False 1252 """Flag for batch/series image processing.""" 1253 1254 def set_nms(self, nms: float): 1255 """ 1256 Set the Non-Maximum Suppression (NMS) threshold. 1257 1258 The NMS threshold controls how aggressively overlapping detections are suppressed. 1259 A lower value reduces the probability of overlapping nuclei being kept. 1260 1261 Parameters 1262 ---------- 1263 nms : float 1264 The NMS IoU threshold value. 1265 """ 1266 1267 self.hyperparameter_nuclei["nms"] = nms 1268 1269 def set_prob(self, prob: float): 1270 """ 1271 Set the probability threshold used in segmentation. 1272 1273 The probability threshold determines the minimum confidence required for an object 1274 (e.g., a nucleus) to be classified as a segmented entity. Higher values result in 1275 fewer segmented objects, as only detections with strong confidence scores are kept. 1276 This may lead to omission of weaker or less distinct structures. 1277 1278 Because optimal values depend on image characteristics, it is important to visually 1279 inspect segmentation results produced with different thresholds to determine the 1280 most suitable setting. 1281 1282 Parameters 1283 ---------- 1284 prob : float 1285 The probability threshold value. 1286 """ 1287 1288 self.hyperparameter_nuclei["prob"] = prob 1289 1290 def set_nuclei_circularity(self, circ: float): 1291 """ 1292 This method sets 'circ' parameter. The circ is a parameter used for adjust minimal nucleus circularity. 1293 1294 Parameters 1295 ---------- 1296 circ : float 1297 Nuclei circularity value. 1298 """ 1299 1300 self.hyperparameter_nuclei["circularity"] = circ 1301 1302 def set_nuclei_local_intenisty_FC(self, local_FC: float): 1303 """ 1304 This method sets the 'FC_diff_local_intensity' parameter. The 'local_FC' is used to remove false positive multiple nuclei that were detected in single image. 1305 1306 Parameters 1307 ---------- 1308 local_FC : float 1309 local_FC value. 1310 """ 1311 1312 self.hyperparameter_nuclei["FC_diff_local_intensity"] = local_FC 1313 1314 # change 1315 def set_nuclei_global_area_FC(self, global_FC: float): 1316 """ 1317 This method sets the 'FC_diff_global' parameter. The 'global_FC' is used to remove false positive multiple nuclei that were detected in a single image and are outliers from the global mean area size. 1318 1319 Parameters 1320 ---------- 1321 FC_diff_global : float 1322 global_FC value. 1323 """ 1324 1325 self.hyperparameter_nuclei["FC_diff_global"] = global_FC 1326 1327 def set_nuclei_size(self, size: tuple): 1328 """ 1329 This method sets 'size' parameter. The size is a parameter used for adjust minimal and maximal nucleus area (px). 1330 1331 Parameters 1332 ---------- 1333 size : tuple 1334 (min_value, max_value) 1335 """ 1336 1337 self.hyperparameter_nuclei["min_size"] = size[0] 1338 self.hyperparameter_nuclei["max_size"] = size[1] 1339 1340 def set_nuclei_min_mean_intensity(self, intensity: int): 1341 """ 1342 This method sets 'intensity' parameter. The 'intensity' parameter is used to adjust the minimum mean intensity of all pixel intensities within the nucleus. 1343 1344 Parameters 1345 ---------- 1346 intensity : int 1347 intensity value. 1348 """ 1349 1350 self.hyperparameter_nuclei["intensity_mean"] = intensity 1351 1352 def set_chromatinization_size(self, size: tuple): 1353 """ 1354 This method sets 'size' parameter. The size is a parameter used for adjust minimal and maximal chromanitization spot area (px) within the nucleus. 1355 1356 Parameters 1357 ---------- 1358 size : tuple 1359 (min_value, max_value) 1360 """ 1361 1362 self.hyperparameter_chromatinization["min_size"] = size[0] 1363 self.hyperparameter_chromatinization["max_size"] = size[1] 1364 1365 def set_chromatinization_ratio(self, ratio: int): 1366 """ 1367 This method sets the 'ratio' parameter. In this case, the 'ratio' parameter is similar to 'circularity' as it describes the ratio between the maximum lengths in the x and y dimensions of the nucleus chromatinization. 1368 1369 Parameters 1370 ---------- 1371 ratio : float 1372 ratio value. 1373 """ 1374 1375 self.hyperparameter_chromatinization["ratio"] = ratio 1376 1377 def set_chromatinization_cut_point(self, cut_point: int): 1378 """ 1379 This method sets the 'cut_point' parameter. The 'cut_point' parameter is a factor used to adjust the threshold for separating the background from chromatin spots. 1380 1381 Parameters 1382 ---------- 1383 cut_point : int 1384 cut_point value. 1385 """ 1386 1387 self.hyperparameter_chromatinization["cut_point"] = cut_point 1388 1389 # 1390 1391 def set_adj_image_gamma(self, gamma: float): 1392 """ 1393 This method sets 'gamma' parameter. The gamma is a parameter used for adjust gamma of the nucleus image. 1394 1395 Parameters 1396 ---------- 1397 gamma : float 1398 gamma value. 1399 """ 1400 1401 self.img_adj_par["gamma"] = gamma 1402 1403 def set_adj_image_contrast(self, contrast: float): 1404 """ 1405 This method sets 'contrast' parameter. The contrast is a parameter used for adjust contrast of the nucleus image. 1406 1407 Parameters 1408 ---------- 1409 contrast : float 1410 contrast value. 1411 """ 1412 1413 self.img_adj_par["contrast"] = contrast 1414 1415 def set_adj_image_brightness(self, brightness: float): 1416 """ 1417 This method sets 'brightness' parameter. The brightness is a parameter used for adjust brightness of the nucleus image. 1418 1419 Parameters 1420 ---------- 1421 brightness : float 1422 brightness value. 1423 """ 1424 1425 self.img_adj_par["brightness"] = brightness 1426 1427 # 1428 1429 def set_adj_chrom_gamma(self, gamma: float): 1430 """ 1431 This method sets 'gamma' parameter. The gamma is a parameter used for adjust gamma of the nucleus chromatinization image. 1432 1433 Parameters 1434 ---------- 1435 gamma : float 1436 gamma value. 1437 """ 1438 1439 self.img_adj_par_chrom["gamma"] = gamma 1440 1441 def set_adj_chrom_contrast(self, contrast: float): 1442 """ 1443 This method sets 'contrast' parameter. The contrast is a parameter used for adjust contrast of the nucleus chromatinization image. 1444 1445 Parameters 1446 ---------- 1447 contrast : float 1448 contrast value. 1449 """ 1450 1451 self.img_adj_par_chrom["contrast"] = contrast 1452 1453 def set_adj_chrom_brightness(self, brightness: float): 1454 """ 1455 This method sets 'brightness' parameter. The brightness is a parameter used for adjust brightness of the nucleus chromatinization image. 1456 1457 Parameters 1458 ---------- 1459 brightness : float 1460 brightness value. 1461 """ 1462 1463 self.img_adj_par_chrom["brightness"] = brightness 1464 1465 @property 1466 def current_parameters_nuclei(self): 1467 """ 1468 This method returns current nuclei analysis parameters. 1469 1470 Returns 1471 ------- 1472 dict 1473 Nuclei analysis parameters. 1474 """ 1475 print(self.hyperparameter_nuclei) 1476 return self.hyperparameter_nuclei 1477 1478 @property 1479 def current_parameters_chromatinization(self): 1480 """ 1481 This method returns current nuclei chromatinization analysis parameters. 1482 1483 Returns 1484 ------- 1485 dict 1486 Nuclei chromatinization analysis parameters. 1487 """ 1488 1489 print(self.hyperparameter_chromatinization) 1490 return self.hyperparameter_chromatinization 1491 1492 @property 1493 def current_parameters_img_adj(self): 1494 """ 1495 This method returns current nuclei image setup. 1496 1497 Returns 1498 ------- 1499 dict 1500 Nuclei image setup. 1501 """ 1502 1503 print(self.img_adj_par) 1504 return self.img_adj_par 1505 1506 @property 1507 def current_parameters_img_adj_chro(self): 1508 """ 1509 This method returns current nuclei chromatinization image setup. 1510 1511 Returns 1512 ------- 1513 dict 1514 Nuclei chromatinization image setup. 1515 """ 1516 1517 print(self.img_adj_par_chrom) 1518 return self.img_adj_par_chrom 1519 1520 def get_results_nuclei(self): 1521 """ 1522 This function returns nuclei analysis results. 1523 1524 Returns 1525 ------- 1526 dict 1527 Nuclei results in the dictionary format. 1528 """ 1529 1530 if self.images["nuclei"] is None: 1531 print("No results to return!") 1532 return None 1533 else: 1534 if cfg._DISPLAY_MODE: 1535 if self.show_plots: 1536 display_preview(self.resize_to_screen_img(self.images["nuclei"])) 1537 return self.nuclei_results["nuclei"], self.images["nuclei"] 1538 1539 def get_results_nuclei_selected(self): 1540 """ 1541 This function returns the results of the nuclei analysis following adjustments to the data selection thresholds. 1542 1543 Returns 1544 ------- 1545 dict 1546 Nuclei results in the dictionary format. 1547 """ 1548 1549 if self.images["nuclei_reduced"] is None: 1550 print("No results to return!") 1551 return None 1552 else: 1553 if cfg._DISPLAY_MODE: 1554 if self.show_plots: 1555 display_preview( 1556 self.resize_to_screen_img(self.images["nuclei_reduced"]) 1557 ) 1558 return self.nuclei_results["nuclei_reduced"], self.images["nuclei_reduced"] 1559 1560 def get_results_nuclei_chromatinization(self): 1561 """ 1562 This function returns the results of the nuclei chromatinization analysis. 1563 1564 Returns 1565 ------- 1566 dict 1567 Nuclei chromatinization results in the dictionary format. 1568 """ 1569 1570 if self.images["nuclei_chromatinization"] is None: 1571 print("No results to return!") 1572 return None 1573 else: 1574 if cfg._DISPLAY_MODE: 1575 if self.show_plots: 1576 display_preview(self.images["nuclei_chromatinization"]) 1577 return ( 1578 self.nuclei_results["nuclei_chromatinization"], 1579 self.images["nuclei_chromatinization"], 1580 ) 1581 1582 def add_test(self, plots): 1583 self.test_results = plots 1584 1585 """ 1586 Helper method. 1587 """ 1588 1589 def input_image(self, img): 1590 """ 1591 This method adds the image to the class for nuclei and/or chromatinization analysis. 1592 1593 Parameters 1594 ---------- 1595 img : np.ndarray 1596 Input image. 1597 """ 1598 1599 self.image = img 1600 self.add_test(None) 1601 1602 def get_features(self, model_out, image): 1603 """ 1604 Extracts numerical feature descriptors from model output for a given image. 1605 1606 This method processes the output returned by a feature-extraction model 1607 (e.g., CNN, encoder network, statistical model) and converts it into a 1608 structured feature vector associated with the provided image. 1609 Typically used for downstream analysis, classification, or clustering. 1610 1611 Parameters 1612 ---------- 1613 model_out : any 1614 Output returned by the feature-extraction model. 1615 The expected format depends on the model (e.g., tensor, dict, list of arrays). 1616 1617 image : ndarray 1618 The input image (2D or 3D array) for which features are being extracted. 1619 Provided for reference or for combining raw image metrics with model features. 1620 1621 Returns 1622 ------- 1623 features : dict 1624 Dictionary containing extracted features. 1625 Keys correspond to feature names, and values are numerical descriptors. 1626 """ 1627 1628 features = { 1629 "label": [], 1630 "area": [], 1631 "area_bbox": [], 1632 "area_convex": [], 1633 "area_filled": [], 1634 "axis_major_length": [], 1635 "axis_minor_length": [], 1636 "eccentricity": [], 1637 "equivalent_diameter_area": [], 1638 "feret_diameter_max": [], 1639 "solidity": [], 1640 "perimeter": [], 1641 "perimeter_crofton": [], 1642 "circularity": [], 1643 "intensity_max": [], 1644 "intensity_mean": [], 1645 "intensity_min": [], 1646 "ratio": [], 1647 "coords": [], 1648 } 1649 1650 for region in skimage.measure.regionprops(model_out, intensity_image=image): 1651 1652 # Compute circularity 1653 if region.perimeter > 0: 1654 circularity = 4 * np.pi * region.area / (region.perimeter**2) 1655 else: 1656 circularity = 0 1657 1658 features["area"].append(region.area) 1659 features["area_bbox"].append(region.area_bbox) 1660 features["area_convex"].append(region.area_convex) 1661 features["area_filled"].append(region.area_filled) 1662 features["axis_major_length"].append(region.axis_major_length) 1663 features["axis_minor_length"].append(region.axis_minor_length) 1664 features["eccentricity"].append(region.eccentricity) 1665 features["equivalent_diameter_area"].append(region.equivalent_diameter_area) 1666 features["feret_diameter_max"].append(region.feret_diameter_max) 1667 features["solidity"].append(region.solidity) 1668 features["perimeter"].append(region.perimeter) 1669 features["perimeter_crofton"].append(region.perimeter_crofton) 1670 features["label"].append(region.label) 1671 features["coords"].append(region.coords) 1672 features["circularity"].append(circularity) 1673 features["intensity_max"].append(np.max(region.intensity_max)) 1674 features["intensity_min"].append(np.max(region.intensity_min)) 1675 features["intensity_mean"].append(np.max(region.intensity_mean)) 1676 1677 ratios = [] 1678 1679 # Calculate the ratio for each pair of values 1680 for min_len, max_len in zip( 1681 features["axis_minor_length"], features["axis_major_length"] 1682 ): 1683 if max_len != 0: 1684 ratio = min_len / max_len 1685 ratios.append(ratio) 1686 else: 1687 ratios.append(float(0.0)) 1688 1689 features["ratio"] = ratios 1690 1691 return features 1692 1693 # repaired stat 1694 def nuclei_finder_test(self): 1695 """ 1696 This method performs testing analysis of parameters (specified 'nms' and 'prob' parameters) 1697 for the image provided by the input_image() method. 1698 1699 This method evaluates the performance of the internal NucleiFinder 1700 configuration using the currently loaded images, parameters, or model 1701 settings. It is typically used to check whether the detection, segmentation 1702 or preprocessing stages run correctly on sample data. 1703 1704 Examples 1705 -------- 1706 >>> nf.nuclei_finder_test() 1707 >>> nf.browser_test() 1708 """ 1709 1710 StarDist2D.from_pretrained() 1711 model = StarDist2D.from_pretrained("2D_versatile_fluo") 1712 1713 nmst = [0.1, 0.2, 0.6] 1714 probt = [0.1, 0.5, 0.9] 1715 1716 try: 1717 img = cv2.cvtColor(self.image, cv2.COLOR_BGR2GRAY) 1718 except: 1719 img = self.image 1720 1721 plot = [] 1722 1723 # adj img 1724 img = adjust_img_16bit( 1725 img, 1726 brightness=self.img_adj_par["brightness"], 1727 contrast=self.img_adj_par["contrast"], 1728 gamma=self.img_adj_par["gamma"], 1729 ) 1730 img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 1731 1732 fig = plt.figure(dpi=300) 1733 plt.imshow(img) 1734 plt.axis("off") 1735 plt.title("Original", fontsize=25) 1736 1737 if cfg._DISPLAY_MODE: 1738 if self.show_plots: 1739 plt.show() 1740 1741 plot.append(fig) 1742 1743 for n in tqdm(nmst, desc="Loop 1: nmst"): 1744 print(f"\n➡️ Starting outer loop for n = {n}") 1745 1746 for t in tqdm(probt, desc=f" ↳ Loop 2 for n={n}", leave=False): 1747 print(f" → Starting inner loop for t = {t}") 1748 1749 labels, _ = model.predict_instances( 1750 normalize(img.copy()), nms_thresh=n, prob_thresh=t 1751 ) 1752 1753 tmp = self.get_features(model_out=labels, image=img) 1754 1755 fig = plt.figure(dpi=300) 1756 plt.imshow(render_label(labels, img=img)) 1757 plt.axis("off") 1758 plt.title( 1759 f"nms {n} & prob {t} \n detected nuc: {len(tmp['area'])}", 1760 fontsize=25, 1761 ) 1762 1763 if cfg._DISPLAY_MODE: 1764 if self.show_plots: 1765 plt.show() 1766 1767 plot.append(fig) 1768 1769 self.add_test(plot) 1770 1771 def find_nuclei(self): 1772 """ 1773 Performs analysis on the image provided by the ``input_image()`` method 1774 using default or user-defined parameters. 1775 1776 To show current parameters, use: 1777 - ``current_parameters_nuclei`` 1778 - ``current_parameters_img_adj`` 1779 1780 To set new parameters, use: 1781 - ``set_nms()`` 1782 - ``set_prob()`` 1783 - ``set_adj_image_gamma()`` 1784 - ``set_adj_image_contrast()`` 1785 - ``set_adj_image_brightness()`` 1786 1787 To get analysis results, use: 1788 - ``get_results_nuclei()`` 1789 """ 1790 1791 if isinstance(self.image, np.ndarray): 1792 1793 model = StarDist2D.from_pretrained("2D_versatile_fluo") 1794 1795 try: 1796 img = cv2.cvtColor(self.image, cv2.COLOR_BGR2GRAY) 1797 except: 1798 img = self.image 1799 1800 img = adjust_img_16bit( 1801 img, 1802 brightness=self.img_adj_par["brightness"], 1803 contrast=self.img_adj_par["contrast"], 1804 gamma=self.img_adj_par["gamma"], 1805 ) 1806 img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 1807 labels, _ = model.predict_instances( 1808 normalize(img), 1809 nms_thresh=self.hyperparameter_nuclei["nms"], 1810 prob_thresh=self.hyperparameter_nuclei["prob"], 1811 ) 1812 1813 self.nuclei_results["nuclei"] = self.get_features( 1814 model_out=labels, image=img 1815 ) 1816 1817 if len(self.nuclei_results["nuclei"]["coords"]) > 0: 1818 1819 oryginal = adjust_img_16bit(img, color="gray") 1820 1821 # series repaired nuclesu 1822 if self.series_im is True: 1823 self.images["nuclei"] = oryginal 1824 else: 1825 nuclei_mask = adjust_img_16bit( 1826 cv2.cvtColor( 1827 self.create_mask(self.nuclei_results["nuclei"], oryginal), 1828 cv2.COLOR_BGR2GRAY, 1829 ), 1830 color="blue", 1831 ) 1832 concatenated_image = cv2.hconcat([oryginal, nuclei_mask]) 1833 self.images["nuclei"] = concatenated_image 1834 1835 if cfg._DISPLAY_MODE: 1836 if self.show_plots: 1837 display_preview( 1838 self.resize_to_screen_img(self.images["nuclei"]) 1839 ) 1840 1841 else: 1842 1843 self.nuclei_results["nuclei"] = None 1844 self.nuclei_results["nuclei_reduced"] = None 1845 self.nuclei_results["nuclei_chromatinization"] = None 1846 1847 print("Nuclei not detected!") 1848 1849 else: 1850 print("\nAdd image firstly!") 1851 1852 def select_nuclei(self): 1853 """ 1854 Selects data obtained from ``find_nuclei()`` based on the set threshold parameters. 1855 1856 To show current parameters, use: 1857 - ``current_parameters_nuclei`` 1858 1859 To set new parameters, use: 1860 - ``set_nuclei_circularity()`` 1861 - ``set_nuclei_size()`` 1862 - ``set_nuclei_min_mean_intensity()`` 1863 1864 To get analysis results, use: 1865 - ``get_results_nuclei_selected()`` 1866 """ 1867 1868 if self.nuclei_results["nuclei"] is not None: 1869 input_in = copy.deepcopy(self.nuclei_results["nuclei"]) 1870 1871 nuclei_dictionary = self.drop_dict( 1872 input_in, 1873 key="area", 1874 var=self.hyperparameter_nuclei["min_size"], 1875 action=">", 1876 ) 1877 nuclei_dictionary = self.drop_dict( 1878 nuclei_dictionary, 1879 key="area", 1880 var=self.hyperparameter_nuclei["max_size"], 1881 action="<", 1882 ) 1883 nuclei_dictionary = self.drop_dict( 1884 nuclei_dictionary, 1885 key="intensity_mean", 1886 var=self.hyperparameter_nuclei["intensity_mean"], 1887 action=">", 1888 ) 1889 1890 if len(nuclei_dictionary["coords"]) > 0: 1891 1892 self.nuclei_results["nuclei_reduced"] = nuclei_dictionary 1893 1894 try: 1895 img = cv2.cvtColor(self.image, cv2.COLOR_BGR2GRAY) 1896 except: 1897 img = self.image 1898 1899 oryginal = adjust_img_16bit(img, color="gray") 1900 1901 # series repaired nuclesu 1902 if self.series_im is True: 1903 self.images["nuclei_reduced"] = oryginal 1904 else: 1905 nuclei_mask = adjust_img_16bit( 1906 cv2.cvtColor( 1907 self.create_mask( 1908 self.nuclei_results["nuclei_reduced"], oryginal 1909 ), 1910 cv2.COLOR_BGR2GRAY, 1911 ), 1912 color="blue", 1913 ) 1914 concatenated_image = cv2.hconcat([oryginal, nuclei_mask]) 1915 1916 self.images["nuclei_reduced"] = concatenated_image 1917 1918 if cfg._DISPLAY_MODE: 1919 if self.show_plots: 1920 display_preview( 1921 self.resize_to_screen_img(self.images["nuclei_reduced"]) 1922 ) 1923 1924 else: 1925 self.nuclei_results["nuclei"] = None 1926 self.nuclei_results["nuclei_reduced"] = None 1927 self.nuclei_results["nuclei_chromatinization"] = None 1928 1929 print("Selected zero nuclei! Analysis stop!") 1930 1931 else: 1932 print("Lack of nuclei data to select!") 1933 1934 def nuclei_chromatinization(self): 1935 """ 1936 Performs chromatinization analysis of nuclei using data obtained from 1937 ``find_nuclei()`` and/or ``select_nuclei()``. 1938 1939 To show current parameters, use: 1940 - ``current_parameters_chromatinization`` 1941 - ``current_parameters_img_adj_chro`` 1942 1943 To set new parameters, use: 1944 - ``set_chromatinization_size()`` 1945 - ``set_chromatinization_ratio()`` 1946 - ``set_chromatinization_cut_point()`` 1947 - ``set_adj_chrom_gamma()`` 1948 - ``set_adj_chrom_contrast()`` 1949 - ``set_adj_chrom_brightness()`` 1950 1951 To get analysis results, use: 1952 - ``get_results_nuclei_chromatinization()`` 1953 """ 1954 1955 def add_lists(f, g): 1956 1957 result = [] 1958 max_length = max(len(f), len(g)) 1959 1960 for i in range(max_length): 1961 f_elem = f[i] if i < len(f) else "" 1962 g_elem = g[i] if i < len(g) else "" 1963 result.append(f_elem + g_elem) 1964 1965 return result 1966 1967 def reverse_coords(image, x, y): 1968 1969 zero = np.zeros(image.shape) 1970 1971 zero[x, y] = 2**16 1972 1973 zero_indices = np.where(zero == 0) 1974 1975 return zero_indices[0], zero_indices[1] 1976 1977 if isinstance(self.nuclei_results["nuclei_reduced"], dict): 1978 nuclei_dictionary = self.nuclei_results["nuclei_reduced"] 1979 else: 1980 nuclei_dictionary = self.nuclei_results["nuclei"] 1981 1982 if nuclei_dictionary is not None: 1983 arrays_list = copy.deepcopy(nuclei_dictionary["coords"]) 1984 1985 chromatione_info = { 1986 "area": [], 1987 "area_bbox": [], 1988 "area_convex": [], 1989 "area_filled": [], 1990 "axis_major_length": [], 1991 "axis_minor_length": [], 1992 "eccentricity": [], 1993 "equivalent_diameter_area": [], 1994 "feret_diameter_max": [], 1995 "solidity": [], 1996 "perimeter": [], 1997 "perimeter_crofton": [], 1998 "coords": [], 1999 } 2000 2001 full_im = np.zeros(self.image.shape[0:2], dtype=np.uint16) 2002 full_im = adjust_img_16bit(full_im) 2003 2004 for arr in arrays_list: 2005 x = list(arr[:, 0]) 2006 y = list(arr[:, 1]) 2007 2008 x1, y1 = reverse_coords(self.image, x, y) 2009 2010 regions_chro2 = self.image.copy() 2011 2012 regions_chro2[x1, y1] = 0 2013 2014 regions_chro2 = regions_chro2.astype("uint16") 2015 2016 try: 2017 regions_chro2 = cv2.cvtColor(regions_chro2, cv2.COLOR_BGR2GRAY) 2018 except: 2019 pass 2020 2021 regions_chro2 = adjust_img_16bit( 2022 regions_chro2, 2023 brightness=self.img_adj_par_chrom["brightness"], 2024 contrast=self.img_adj_par_chrom["contrast"], 2025 gamma=self.img_adj_par_chrom["gamma"], 2026 ) 2027 2028 full_im = merge_images( 2029 image_list=[full_im, regions_chro2], intensity_factors=[1, 1] 2030 ) 2031 2032 ret, thresh = cv2.threshold( 2033 regions_chro2[x, y], 2034 0, 2035 2**16 - 1, 2036 cv2.THRESH_BINARY + cv2.THRESH_OTSU, 2037 ) 2038 2039 regions_chro2[ 2040 regions_chro2 2041 <= ret * self.hyperparameter_chromatinization["cut_point"] 2042 ] = 0 2043 2044 regions_chro2 = cv2.cvtColor(regions_chro2, cv2.COLOR_BGR2GRAY) 2045 2046 chromatione = regions_chro2 > 0 2047 2048 labeled_cells = measure.label(chromatione) 2049 regions = measure.regionprops(labeled_cells) 2050 regions = measure.regionprops( 2051 labeled_cells, intensity_image=regions_chro2 2052 ) 2053 2054 for region in regions: 2055 2056 chromatione_info["area"].append(region.area) 2057 chromatione_info["area_bbox"].append(region.area_bbox) 2058 chromatione_info["area_convex"].append(region.area_convex) 2059 chromatione_info["area_filled"].append(region.area_filled) 2060 chromatione_info["axis_major_length"].append( 2061 region.axis_major_length 2062 ) 2063 chromatione_info["axis_minor_length"].append( 2064 region.axis_minor_length 2065 ) 2066 chromatione_info["eccentricity"].append(region.eccentricity) 2067 chromatione_info["equivalent_diameter_area"].append( 2068 region.equivalent_diameter_area 2069 ) 2070 chromatione_info["feret_diameter_max"].append( 2071 region.feret_diameter_max 2072 ) 2073 chromatione_info["solidity"].append(region.solidity) 2074 chromatione_info["perimeter"].append(region.perimeter) 2075 chromatione_info["perimeter_crofton"].append( 2076 region.perimeter_crofton 2077 ) 2078 chromatione_info["coords"].append(region.coords) 2079 2080 ratios = [] 2081 2082 for min_len, max_len in zip( 2083 chromatione_info["axis_minor_length"], 2084 chromatione_info["axis_major_length"], 2085 ): 2086 if max_len != 0: 2087 ratio = min_len / max_len 2088 ratios.append(ratio) 2089 else: 2090 ratios.append(float(0.0)) 2091 2092 chromatione_info["ratio"] = ratios 2093 2094 chromation_dic = self.drop_dict( 2095 chromatione_info, 2096 key="area", 2097 var=self.hyperparameter_chromatinization["min_size"], 2098 action=">", 2099 ) 2100 chromation_dic = self.drop_dict( 2101 chromation_dic, 2102 key="area", 2103 var=self.hyperparameter_chromatinization["max_size"], 2104 action="<", 2105 ) 2106 chromation_dic = self.drop_dict( 2107 chromation_dic, 2108 key="ratio", 2109 var=self.hyperparameter_chromatinization["ratio"], 2110 action=">", 2111 ) 2112 2113 arrays_list2 = copy.deepcopy(chromation_dic["coords"]) 2114 2115 nuclei_dictionary["spot_size_area"] = [] 2116 nuclei_dictionary["spot_size_area_bbox"] = [] 2117 nuclei_dictionary["spot_size_area_convex"] = [] 2118 nuclei_dictionary["spot_size_area_filled"] = [] 2119 nuclei_dictionary["spot_axis_major_length"] = [] 2120 nuclei_dictionary["spot_axis_minor_length"] = [] 2121 nuclei_dictionary["spot_eccentricity"] = [] 2122 nuclei_dictionary["spot_size_equivalent_diameter_area"] = [] 2123 nuclei_dictionary["spot_feret_diameter_max"] = [] 2124 nuclei_dictionary["spot_perimeter"] = [] 2125 nuclei_dictionary["spot_perimeter_crofton"] = [] 2126 2127 for i, arr in enumerate(arrays_list): 2128 2129 spot_size_area = [] 2130 spot_size_area_bbox = [] 2131 spot_size_area_convex = [] 2132 spot_size_area_convex = [] 2133 spot_size_area_filled = [] 2134 spot_axis_major_length = [] 2135 spot_axis_minor_length = [] 2136 spot_eccentricity = [] 2137 spot_size_equivalent_diameter_area = [] 2138 spot_feret_diameter_max = [] 2139 spot_perimeter = [] 2140 spot_perimeter_crofton = [] 2141 2142 # Flatten the array, 2143 df_tmp = pd.DataFrame(arr) 2144 df_tmp["duplicates"] = add_lists( 2145 [str(x) for x in df_tmp[0]], [str(y) for y in df_tmp[1]] 2146 ) 2147 2148 counter_tmp = Counter(df_tmp["duplicates"]) 2149 2150 for j, arr2 in enumerate(arrays_list2): 2151 df_tmp2 = pd.DataFrame(arr2) 2152 df_tmp2["duplicates"] = add_lists( 2153 [str(x) for x in df_tmp2[0]], [str(y) for y in df_tmp2[1]] 2154 ) 2155 2156 counter_tmp2 = Counter(df_tmp2["duplicates"]) 2157 intersection_length = len(counter_tmp.keys() & counter_tmp2.keys()) 2158 min_length = min(len(counter_tmp), len(counter_tmp2)) 2159 2160 if intersection_length >= 0.8 * min_length: 2161 2162 if ( 2163 len(list(df_tmp2["duplicates"])) 2164 / len(list(df_tmp["duplicates"])) 2165 ) >= 0.025 and ( 2166 len(list(df_tmp2["duplicates"])) 2167 / len(list(df_tmp["duplicates"])) 2168 ) <= 0.5: 2169 spot_size_area.append(chromation_dic["area"][j]) 2170 spot_size_area_bbox.append(chromation_dic["area_bbox"][j]) 2171 spot_size_area_convex.append( 2172 chromation_dic["area_convex"][j] 2173 ) 2174 spot_size_area_filled.append( 2175 chromation_dic["area_filled"][j] 2176 ) 2177 spot_axis_major_length.append( 2178 chromation_dic["axis_major_length"][j] 2179 ) 2180 spot_axis_minor_length.append( 2181 chromation_dic["axis_minor_length"][j] 2182 ) 2183 spot_eccentricity.append(chromation_dic["eccentricity"][j]) 2184 spot_size_equivalent_diameter_area.append( 2185 chromation_dic["equivalent_diameter_area"][j] 2186 ) 2187 spot_feret_diameter_max.append( 2188 chromation_dic["feret_diameter_max"][j] 2189 ) 2190 spot_perimeter.append(chromation_dic["perimeter"][j]) 2191 spot_perimeter_crofton.append( 2192 chromation_dic["perimeter_crofton"][j] 2193 ) 2194 2195 nuclei_dictionary["spot_size_area"].append(spot_size_area) 2196 nuclei_dictionary["spot_size_area_bbox"].append(spot_size_area_bbox) 2197 nuclei_dictionary["spot_size_area_convex"].append(spot_size_area_convex) 2198 nuclei_dictionary["spot_size_area_filled"].append(spot_size_area_filled) 2199 nuclei_dictionary["spot_axis_major_length"].append( 2200 spot_axis_major_length 2201 ) 2202 nuclei_dictionary["spot_axis_minor_length"].append( 2203 spot_axis_minor_length 2204 ) 2205 nuclei_dictionary["spot_eccentricity"].append(spot_eccentricity) 2206 nuclei_dictionary["spot_size_equivalent_diameter_area"].append( 2207 spot_size_equivalent_diameter_area 2208 ) 2209 nuclei_dictionary["spot_feret_diameter_max"].append( 2210 spot_feret_diameter_max 2211 ) 2212 nuclei_dictionary["spot_perimeter"].append(spot_perimeter) 2213 nuclei_dictionary["spot_perimeter_crofton"].append( 2214 spot_perimeter_crofton 2215 ) 2216 2217 self.nuclei_results["chromatinization"] = chromation_dic 2218 self.nuclei_results["nuclei_chromatinization"] = nuclei_dictionary 2219 2220 self.images["nuclei_chromatinization"] = self.create_mask( 2221 chromation_dic, self.image 2222 ) 2223 2224 img_chrom = adjust_img_16bit( 2225 cv2.cvtColor( 2226 self.create_mask( 2227 self.nuclei_results["chromatinization"], self.image 2228 ), 2229 cv2.COLOR_BGR2GRAY, 2230 ), 2231 color="yellow", 2232 ) 2233 2234 if isinstance(self.nuclei_results["nuclei_reduced"], dict): 2235 nuclei_mask = adjust_img_16bit( 2236 cv2.cvtColor( 2237 self.create_mask( 2238 self.nuclei_results["nuclei_reduced"], self.image 2239 ), 2240 cv2.COLOR_BGR2GRAY, 2241 ), 2242 color="blue", 2243 ) 2244 else: 2245 nuclei_mask = adjust_img_16bit( 2246 cv2.cvtColor( 2247 self.create_mask(self.nuclei_results["nuclei"], self.image), 2248 cv2.COLOR_BGR2GRAY, 2249 ), 2250 color="blue", 2251 ) 2252 2253 nuclei_mask = merge_images([nuclei_mask, img_chrom], [1, 1]) 2254 2255 try: 2256 img = cv2.cvtColor(full_im, cv2.COLOR_BGR2GRAY) 2257 except: 2258 img = full_im 2259 2260 oryginal = adjust_img_16bit(img, color="gray") 2261 2262 concatenated_image = cv2.hconcat([oryginal, nuclei_mask]) 2263 2264 self.images["nuclei_chromatinization"] = concatenated_image 2265 2266 if cfg._DISPLAY_MODE: 2267 if self.show_plots: 2268 display_preview( 2269 self.resize_to_screen_img( 2270 self.images["nuclei_chromatinization"] 2271 ) 2272 ) 2273 2274 else: 2275 print("Lack of nuclei data to select!") 2276 2277 # separate function for chromatinization 2278 2279 def _nuclei_chromatinization_series(self, image, nuclei_data): 2280 """ 2281 Helper method for performing chromatinization analysis on nuclei detected in the provided image. 2282 """ 2283 2284 def add_lists(f, g): 2285 result = [] 2286 max_length = max(len(f), len(g)) 2287 2288 for i in range(max_length): 2289 f_elem = f[i] if i < len(f) else "" 2290 g_elem = g[i] if i < len(g) else "" 2291 result.append(f_elem + g_elem) 2292 2293 return result 2294 2295 def reverse_coords(image, x, y): 2296 2297 zero = np.zeros(image.shape) 2298 2299 zero[x, y] = 2**16 2300 2301 zero_indices = np.where(zero == 0) 2302 2303 return zero_indices[0], zero_indices[1] 2304 2305 nuclei_dictionary = nuclei_data.copy() 2306 2307 if nuclei_dictionary is not None: 2308 arrays_list = copy.deepcopy(nuclei_dictionary["coords"]) 2309 2310 chromatione_info = { 2311 "area": [], 2312 "area_bbox": [], 2313 "area_convex": [], 2314 "area_filled": [], 2315 "axis_major_length": [], 2316 "axis_minor_length": [], 2317 "eccentricity": [], 2318 "equivalent_diameter_area": [], 2319 "feret_diameter_max": [], 2320 "solidity": [], 2321 "perimeter": [], 2322 "perimeter_crofton": [], 2323 "coords": [], 2324 } 2325 2326 full_im = np.zeros(image.shape[0:2], dtype=np.uint16) 2327 full_im = adjust_img_16bit(full_im) 2328 2329 for arr in arrays_list: 2330 x = list(arr[:, 0]) 2331 y = list(arr[:, 1]) 2332 2333 x1, y1 = reverse_coords(image, x, y) 2334 2335 regions_chro2 = image.copy() 2336 2337 regions_chro2[x1, y1] = 0 2338 2339 regions_chro2 = regions_chro2.astype("uint16") 2340 2341 try: 2342 regions_chro2 = cv2.cvtColor(regions_chro2, cv2.COLOR_BGR2GRAY) 2343 except: 2344 pass 2345 2346 regions_chro2 = adjust_img_16bit( 2347 regions_chro2, 2348 brightness=self.img_adj_par_chrom["brightness"], 2349 contrast=self.img_adj_par_chrom["contrast"], 2350 gamma=self.img_adj_par_chrom["gamma"], 2351 ) 2352 2353 full_im = merge_images( 2354 image_list=[full_im, regions_chro2], intensity_factors=[1, 1] 2355 ) 2356 2357 ret, _ = cv2.threshold( 2358 regions_chro2[x, y], 2359 0, 2360 2**16 - 1, 2361 cv2.THRESH_BINARY + cv2.THRESH_OTSU, 2362 ) 2363 2364 regions_chro2[ 2365 regions_chro2 2366 <= ret * self.hyperparameter_chromatinization["cut_point"] 2367 ] = 0 2368 2369 regions_chro2 = cv2.cvtColor(regions_chro2, cv2.COLOR_BGR2GRAY) 2370 2371 chromatione = regions_chro2 > 0 2372 2373 labeled_cells = measure.label(chromatione) 2374 regions = measure.regionprops(labeled_cells) 2375 regions = measure.regionprops( 2376 labeled_cells, intensity_image=regions_chro2 2377 ) 2378 2379 for region in regions: 2380 2381 chromatione_info["area"].append(region.area) 2382 chromatione_info["area_bbox"].append(region.area_bbox) 2383 chromatione_info["area_convex"].append(region.area_convex) 2384 chromatione_info["area_filled"].append(region.area_filled) 2385 chromatione_info["axis_major_length"].append( 2386 region.axis_major_length 2387 ) 2388 chromatione_info["axis_minor_length"].append( 2389 region.axis_minor_length 2390 ) 2391 chromatione_info["eccentricity"].append(region.eccentricity) 2392 chromatione_info["equivalent_diameter_area"].append( 2393 region.equivalent_diameter_area 2394 ) 2395 chromatione_info["feret_diameter_max"].append( 2396 region.feret_diameter_max 2397 ) 2398 chromatione_info["solidity"].append(region.solidity) 2399 chromatione_info["perimeter"].append(region.perimeter) 2400 chromatione_info["perimeter_crofton"].append( 2401 region.perimeter_crofton 2402 ) 2403 chromatione_info["coords"].append(region.coords) 2404 2405 ratios = [] 2406 2407 for min_len, max_len in zip( 2408 chromatione_info["axis_minor_length"], 2409 chromatione_info["axis_major_length"], 2410 ): 2411 if max_len != 0: 2412 ratio = min_len / max_len 2413 ratios.append(ratio) 2414 else: 2415 ratios.append(float(0.0)) 2416 2417 chromatione_info["ratio"] = ratios 2418 2419 chromation_dic = self.drop_dict( 2420 chromatione_info, 2421 key="area", 2422 var=self.hyperparameter_chromatinization["min_size"], 2423 action=">", 2424 ) 2425 chromation_dic = self.drop_dict( 2426 chromation_dic, 2427 key="area", 2428 var=self.hyperparameter_chromatinization["max_size"], 2429 action="<", 2430 ) 2431 chromation_dic = self.drop_dict( 2432 chromation_dic, 2433 key="ratio", 2434 var=self.hyperparameter_chromatinization["ratio"], 2435 action=">", 2436 ) 2437 2438 arrays_list2 = copy.deepcopy(chromation_dic["coords"]) 2439 2440 nuclei_dictionary["spot_size_area"] = [] 2441 nuclei_dictionary["spot_size_area_bbox"] = [] 2442 nuclei_dictionary["spot_size_area_convex"] = [] 2443 nuclei_dictionary["spot_size_area_filled"] = [] 2444 nuclei_dictionary["spot_axis_major_length"] = [] 2445 nuclei_dictionary["spot_axis_minor_length"] = [] 2446 nuclei_dictionary["spot_eccentricity"] = [] 2447 nuclei_dictionary["spot_size_equivalent_diameter_area"] = [] 2448 nuclei_dictionary["spot_feret_diameter_max"] = [] 2449 nuclei_dictionary["spot_perimeter"] = [] 2450 nuclei_dictionary["spot_perimeter_crofton"] = [] 2451 2452 for arr in arrays_list: 2453 2454 spot_size_area = [] 2455 spot_size_area_bbox = [] 2456 spot_size_area_convex = [] 2457 spot_size_area_convex = [] 2458 spot_size_area_filled = [] 2459 spot_axis_major_length = [] 2460 spot_axis_minor_length = [] 2461 spot_eccentricity = [] 2462 spot_size_equivalent_diameter_area = [] 2463 spot_feret_diameter_max = [] 2464 spot_perimeter = [] 2465 spot_perimeter_crofton = [] 2466 2467 # Flatten the array, 2468 df_tmp = pd.DataFrame(arr) 2469 df_tmp["duplicates"] = add_lists( 2470 [str(x) for x in df_tmp[0]], [str(y) for y in df_tmp[1]] 2471 ) 2472 2473 counter_tmp = Counter(df_tmp["duplicates"]) 2474 2475 for j, arr2 in enumerate(arrays_list2): 2476 df_tmp2 = pd.DataFrame(arr2) 2477 df_tmp2["duplicates"] = add_lists( 2478 [str(x) for x in df_tmp2[0]], [str(y) for y in df_tmp2[1]] 2479 ) 2480 2481 counter_tmp2 = Counter(df_tmp2["duplicates"]) 2482 intersection_length = len(counter_tmp.keys() & counter_tmp2.keys()) 2483 min_length = min(len(counter_tmp), len(counter_tmp2)) 2484 2485 if intersection_length >= 0.8 * min_length: 2486 2487 if ( 2488 len(list(df_tmp2["duplicates"])) 2489 / len(list(df_tmp["duplicates"])) 2490 ) >= 0.025 and ( 2491 len(list(df_tmp2["duplicates"])) 2492 / len(list(df_tmp["duplicates"])) 2493 ) <= 0.5: 2494 spot_size_area.append(chromation_dic["area"][j]) 2495 spot_size_area_bbox.append(chromation_dic["area_bbox"][j]) 2496 spot_size_area_convex.append( 2497 chromation_dic["area_convex"][j] 2498 ) 2499 spot_size_area_filled.append( 2500 chromation_dic["area_filled"][j] 2501 ) 2502 spot_axis_major_length.append( 2503 chromation_dic["axis_major_length"][j] 2504 ) 2505 spot_axis_minor_length.append( 2506 chromation_dic["axis_minor_length"][j] 2507 ) 2508 spot_eccentricity.append(chromation_dic["eccentricity"][j]) 2509 spot_size_equivalent_diameter_area.append( 2510 chromation_dic["equivalent_diameter_area"][j] 2511 ) 2512 spot_feret_diameter_max.append( 2513 chromation_dic["feret_diameter_max"][j] 2514 ) 2515 spot_perimeter.append(chromation_dic["perimeter"][j]) 2516 spot_perimeter_crofton.append( 2517 chromation_dic["perimeter_crofton"][j] 2518 ) 2519 2520 nuclei_dictionary["spot_size_area"].append(spot_size_area) 2521 nuclei_dictionary["spot_size_area_bbox"].append(spot_size_area_bbox) 2522 nuclei_dictionary["spot_size_area_convex"].append(spot_size_area_convex) 2523 nuclei_dictionary["spot_size_area_filled"].append(spot_size_area_filled) 2524 nuclei_dictionary["spot_axis_major_length"].append( 2525 spot_axis_major_length 2526 ) 2527 nuclei_dictionary["spot_axis_minor_length"].append( 2528 spot_axis_minor_length 2529 ) 2530 nuclei_dictionary["spot_eccentricity"].append(spot_eccentricity) 2531 nuclei_dictionary["spot_size_equivalent_diameter_area"].append( 2532 spot_size_equivalent_diameter_area 2533 ) 2534 nuclei_dictionary["spot_feret_diameter_max"].append( 2535 spot_feret_diameter_max 2536 ) 2537 nuclei_dictionary["spot_perimeter"].append(spot_perimeter) 2538 nuclei_dictionary["spot_perimeter_crofton"].append( 2539 spot_perimeter_crofton 2540 ) 2541 2542 self.nuclei_results["chromatinization"] = chromation_dic 2543 self.nuclei_results["nuclei_chromatinization"] = nuclei_dictionary 2544 2545 self.images["nuclei_chromatinization"] = self.create_mask( 2546 chromation_dic, image 2547 ) 2548 2549 img_chrom = adjust_img_16bit( 2550 cv2.cvtColor( 2551 self.create_mask(self.nuclei_results["chromatinization"], image), 2552 cv2.COLOR_BGR2GRAY, 2553 ), 2554 color="yellow", 2555 ) 2556 2557 nuclei_mask = adjust_img_16bit( 2558 cv2.cvtColor(self.create_mask(nuclei_data, image), cv2.COLOR_BGR2GRAY), 2559 color="blue", 2560 ) 2561 2562 nuclei_mask = merge_images([nuclei_mask, img_chrom], [1, 1]) 2563 2564 try: 2565 img = cv2.cvtColor(full_im, cv2.COLOR_BGR2GRAY) 2566 except: 2567 img = full_im 2568 2569 oryginal = adjust_img_16bit(img, color="gray") 2570 2571 concatenated_image = cv2.hconcat([oryginal, nuclei_mask]) 2572 2573 self.images["nuclei_chromatinization"] = concatenated_image 2574 2575 if cfg._DISPLAY_MODE: 2576 if self.show_plots: 2577 display_preview( 2578 self.resize_to_screen_img( 2579 self.images["nuclei_chromatinization"] 2580 ) 2581 ) 2582 2583 else: 2584 print("Lack of nuclei data to select!") 2585 2586 def browser_test(self): 2587 """ 2588 Displays test results generated by the ``nuclei_finder_test()`` method 2589 in the default web browser. 2590 """ 2591 2592 html_content = "" 2593 2594 for fig in self.test_results: 2595 buf = BytesIO() 2596 fig.savefig(buf, format="png", bbox_inches="tight") 2597 buf.seek(0) 2598 2599 img_base64 = base64.b64encode(buf.read()).decode("utf-8") 2600 2601 html_content += f'<img src="data:image/png;base64,{img_base64}" style="margin:10px;"/>\n' 2602 2603 with tempfile.NamedTemporaryFile( 2604 mode="w", delete=False, suffix=".html" 2605 ) as tmp_file: 2606 tmp_file.write(html_content) 2607 tmp_filename = tmp_file.name 2608 2609 webbrowser.open_new_tab(tmp_filename) 2610 2611 def series_analysis_chromatinization( 2612 self, 2613 path_to_images: str, 2614 file_extension: str = "tiff", 2615 selected_id: list = [], 2616 fille_name_part: str = "", 2617 selection_opt: bool = True, 2618 include_img: bool = True, 2619 test_series: int = 0, 2620 ): 2621 """ 2622 Performs full analysis on images provided via the ``input_image()`` method 2623 using default or user-defined parameters. 2624 2625 This method runs nuclei detection, nuclei selection, and chromatinization 2626 analysis in a single pipeline. Users can adjust parameters for each step 2627 before running the analysis. 2628 2629 To show current parameters, use: 2630 - ``current_parameters_nuclei`` 2631 - ``current_parameters_img_adj`` 2632 - ``current_parameters_chromatinization`` 2633 - ``current_parameters_img_adj_chro`` 2634 2635 To set new parameters, use: 2636 - ``set_nms()`` 2637 - ``set_prob()`` 2638 - ``set_adj_image_gamma()`` 2639 - ``set_adj_image_contrast()`` 2640 - ``set_adj_image_brightness()`` 2641 - ``set_nuclei_circularity()`` 2642 - ``set_nuclei_size()`` 2643 - ``set_nuclei_min_mean_intensity()`` 2644 - ``set_chromatinization_size()`` 2645 - ``set_chromatinization_ratio()`` 2646 - ``set_chromatinization_cut_point()`` 2647 - ``set_adj_chrom_gamma()`` 2648 - ``set_adj_chrom_contrast()`` 2649 - ``set_adj_chrom_brightness()`` 2650 2651 Parameters 2652 ---------- 2653 path_to_images : str 2654 Path to the directory containing images for analysis. 2655 2656 file_extension : str, optional 2657 Extension of the image files. Default is 'tiff'. 2658 2659 selected_id : list, optional 2660 List of IDs that must be part of the image name to distinguish them 2661 from others. Default is an empty list, which means all images in 2662 the directory will be processed. 2663 2664 fille_name_part : str, optional 2665 Part of the file name to filter images. Default is an empty string. 2666 2667 selection_opt : bool, optional 2668 Whether to run ``select_nuclei()`` with the defined parameters. Default is True. 2669 2670 include_img : bool, optional 2671 Whether to include the images in the result dictionary. Default is True. 2672 2673 test_series : int, optional 2674 Number of images to test the parameters and return results. Default is 0, 2675 which means all images in the directory will be processed. 2676 2677 Returns 2678 ------- 2679 results_dict : dict 2680 Dictionary containing results for each image in the directory. 2681 Keys correspond to image file names. 2682 2683 Notes 2684 ----- 2685 This method runs the complete nuclei and chromatinization analysis pipeline. 2686 2687 Parameters must be set appropriately before calling to ensure correct results. 2688 """ 2689 2690 results_dict = {} 2691 results_img = {} 2692 results_img_raw = {} 2693 2694 files = glob.glob(os.path.join(path_to_images, "*." + file_extension)) 2695 2696 if len(fille_name_part) > 0: 2697 files = [x for x in files if fille_name_part.lower() in x.lower()] 2698 2699 if len(selected_id) > 0: 2700 selected_id = [str(x) for x in selected_id] 2701 files = [ 2702 x 2703 for x in files 2704 if re.sub("_.*", "", os.path.basename(x)) in selected_id 2705 ] 2706 2707 if test_series > 0: 2708 2709 files = random.sample(files, test_series) 2710 2711 self.show_plots = False 2712 self.series_im = True 2713 2714 print("\nFile analysis:\n\n") 2715 2716 for file in tqdm(files): 2717 2718 print(file) 2719 2720 self.show_plots = False 2721 2722 image = self.load_image(file) 2723 2724 self.input_image(image) 2725 2726 self.find_nuclei() 2727 2728 tmp = None 2729 2730 if selection_opt is True: 2731 self.select_nuclei() 2732 tmp = self.get_results_nuclei_selected() 2733 2734 else: 2735 tmp = self.get_results_nuclei() 2736 2737 if tmp is not None: 2738 2739 if tmp[0] is not None: 2740 2741 results_dict[str(os.path.basename(file))] = tmp[0] 2742 results_img[str(os.path.basename(file))] = tmp[1] 2743 results_img_raw[str(os.path.basename(file))] = image 2744 del tmp 2745 del image 2746 2747 results_dict_tmp = self.repairing_nuclei(results_dict) 2748 2749 results_dict = {} 2750 2751 print("\nChromatization searching:\n\n") 2752 2753 for ke in tqdm(results_dict_tmp.keys()): 2754 2755 tmp = None 2756 2757 try: 2758 self._nuclei_chromatinization_series( 2759 results_img_raw[ke], results_dict_tmp[ke] 2760 ) 2761 tmp = self.get_results_nuclei_chromatinization() 2762 except: 2763 print(f"Sample {ke} could not be processed.") 2764 2765 if tmp is not None: 2766 2767 if tmp[0] is not None: 2768 2769 tmp[0].pop("coords") 2770 2771 if include_img: 2772 results_dict[str(os.path.basename(ke))] = { 2773 "stats": tmp[0], 2774 "img": cv2.hconcat([results_img[ke], tmp[1]]), 2775 } 2776 del tmp 2777 else: 2778 results_dict[str(os.path.basename(ke))] = tmp[0] 2779 del tmp 2780 2781 else: 2782 print(f"Unable to obtain results for {print(ke)}") 2783 2784 self.show_plots = True 2785 self.series_im = False 2786 2787 return results_dict 2788 2789 def series_analysis_nuclei( 2790 self, 2791 path_to_images: str, 2792 file_extension: str = "tiff", 2793 selected_id: list = [], 2794 fille_name_part: str = "", 2795 selection_opt: bool = True, 2796 include_img: bool = True, 2797 test_series: int = 0, 2798 ): 2799 """ 2800 Performs analysis on the image provided by the ``input_image()`` method 2801 using default or user-defined parameters. 2802 2803 This method runs nuclei detection and selection using the currently set 2804 parameters. Users can adjust image preprocessing and nuclei detection 2805 parameters before running the analysis. 2806 2807 To show current parameters, use: 2808 - ``current_parameters_nuclei`` 2809 - ``current_parameters_img_adj`` 2810 2811 To set new parameters, use: 2812 - ``set_nms()`` 2813 - ``set_prob()`` 2814 - ``set_adj_image_gamma()`` 2815 - ``set_adj_image_contrast()`` 2816 - ``set_adj_image_brightness()`` 2817 - ``set_nuclei_circularity()`` 2818 - ``set_nuclei_size()`` 2819 - ``set_nuclei_min_mean_intensity()`` 2820 2821 Parameters 2822 ---------- 2823 path_to_images : str 2824 Path to the directory containing images for analysis. 2825 2826 file_extension : str, optional 2827 Extension of the image files. Default is 'tiff'. 2828 2829 selected_id : list, optional 2830 List of IDs that must be part of the image name to distinguish them 2831 from others. Default is an empty list, which means all images in 2832 the directory will be processed. 2833 2834 fille_name_part : str, optional 2835 Part of the file name to filter images. Default is an empty string. 2836 2837 selection_opt : bool, optional 2838 Whether to run the ``select_nuclei()`` method with the defined parameters. 2839 Default is True. 2840 2841 include_img : bool, optional 2842 Whether to include the images in the result dictionary. Default is True. 2843 2844 test_series : int, optional 2845 Number of images to test the parameters and return results. Default is 0, 2846 which means all images in the directory will be processed. 2847 2848 Returns 2849 ------- 2850 results_dict : dict 2851 Dictionary containing results for each image in the directory. 2852 Keys correspond to image file names. 2853 """ 2854 2855 results_dict = {} 2856 results_img = {} 2857 2858 files = glob.glob(os.path.join(path_to_images, "*." + file_extension)) 2859 2860 if len(fille_name_part) > 0: 2861 files = [x for x in files if fille_name_part.lower() in x.lower()] 2862 2863 if len(selected_id) > 0: 2864 selected_id = [str(x) for x in selected_id] 2865 files = [ 2866 x 2867 for x in files 2868 if re.sub("_.*", "", os.path.basename(x)) in selected_id 2869 ] 2870 2871 if test_series > 0: 2872 2873 files = random.sample(files, test_series) 2874 2875 self.show_plots = False 2876 self.series_im = True 2877 2878 print("\nFile analysis:\n\n") 2879 2880 for file in tqdm(files): 2881 2882 print(file) 2883 2884 image = self.load_image(file) 2885 2886 self.input_image(image) 2887 2888 self.find_nuclei() 2889 2890 if self.nuclei_results["nuclei"] is not None: 2891 2892 tmp = [None] 2893 2894 if selection_opt is True: 2895 self.select_nuclei() 2896 tmp = self.get_results_nuclei_selected() 2897 2898 else: 2899 tmp = self.get_results_nuclei() 2900 2901 if tmp is not None: 2902 2903 if tmp[0] is not None: 2904 2905 if include_img: 2906 results_dict[str(os.path.basename(file))] = tmp[0] 2907 results_img[str(os.path.basename(file))] = tmp[1] 2908 2909 del tmp 2910 2911 else: 2912 results_dict[str(os.path.basename(file))] = tmp[0] 2913 del tmp 2914 2915 else: 2916 print(f"Unable to obtain results for {print(file)}") 2917 2918 else: 2919 2920 print(f"Unable to obtain results for {print(file)}") 2921 2922 self.show_plots = True 2923 self.series_im = False 2924 2925 results_dict_tmp = self.repairing_nuclei(results_dict) 2926 2927 if include_img is False: 2928 2929 return results_dict_tmp 2930 2931 else: 2932 2933 results_dict = {} 2934 2935 for ke in results_dict_tmp.keys(): 2936 2937 nuclei_mask = adjust_img_16bit( 2938 cv2.cvtColor( 2939 self.create_mask(results_dict_tmp[ke], results_img[ke]), 2940 cv2.COLOR_BGR2GRAY, 2941 ), 2942 color="blue", 2943 ) 2944 concatenated_image = cv2.hconcat([results_img[ke], nuclei_mask]) 2945 2946 cred = results_dict_tmp[ke] 2947 # cred.pop('coords') 2948 2949 results_dict[ke] = {"stats": cred, "img": concatenated_image} 2950 2951 return results_dict 2952 2953 2954class NucleiDataManagement: 2955 """ 2956 Manages nuclei analysis data obtained from the `NucleiFinder` class, 2957 including nuclei properties and optionally Image Stream (IS) data. 2958 2959 This class allows loading nuclei data from JSON files or directly from 2960 `NucleiFinder` analysis results, converting them to pandas DataFrames, 2961 adding IS data, concatenating results from multiple experiments, and 2962 saving results in JSON or CSV format. It also provides helper methods 2963 for merging, filtering, and retrieving data. 2964 2965 Attributes 2966 ---------- 2967 nuceli_data : dict 2968 Dictionary storing nuclei properties for each image or experiment. 2969 2970 experiment_name : str 2971 Name of the experiment. 2972 2973 nuceli_data_df : pd.DataFrame or None 2974 DataFrame representation of nuclei properties. 2975 2976 nuclei_IS_data : pd.DataFrame or None 2977 DataFrame of nuclei data merged with IS data. 2978 2979 concat_data : list or None 2980 List of other `NucleiDataManagement` objects added for combined analysis. 2981 2982 Methods 2983 ------- 2984 load_nuc_dict(path) 2985 Load nuclei data from a JSON dictionary file (*.nuc) and initialize the object. 2986 _convert_to_df() 2987 Convert nuclei dictionary data to a pandas DataFrame. 2988 2989 add_IS_data(IS_data, IS_features) 2990 Merge Image Stream (IS) data with nuclei data. 2991 2992 get_data() 2993 Retrieve the nuclei data as a pandas DataFrame. 2994 2995 get_data_with_IS() 2996 Retrieve the nuclei data merged with IS data. 2997 2998 save_nuc_project(path) 2999 Save nuclei data as a JSON file with *.nuc extension. 3000 3001 save_results_df(path) 3002 Save nuclei data as a CSV file. 3003 3004 save_results_df_with_IS(path) 3005 Save nuclei data merged with IS data as a CSV file. 3006 3007 add_experiment(data_list) 3008 Add other `NucleiDataManagement` objects for concatenated analysis. 3009 3010 get_mutual_experiments_data(inc_is) 3011 Retrieve concatenated nuclei data from multiple experiments. 3012 3013 save_mutual_experiments(path, inc_is) 3014 Save concatenated data from multiple experiments as a CSV file. 3015 """ 3016 3017 def __init__(self, nuclei_data: dict, experiment_name: str): 3018 """ 3019 Initialize a NucleiDataManagement object with nuclei data and experiment name. 3020 3021 Parameters 3022 ---------- 3023 nuclei_data : dict 3024 Dictionary containing nuclei properties for each image or experiment. 3025 If the dictionary entries have keys 'stats' and 'img', only 'stats' are stored. 3026 3027 experiment_name : str 3028 Name of the experiment. 3029 3030 Attributes 3031 ---------- 3032 nuceli_data : dict 3033 Dictionary storing nuclei properties for each image or experiment. 3034 3035 experiment_name : str 3036 Name of the experiment. 3037 3038 nuceli_data_df : pd.DataFrame or None 3039 DataFrame representation of nuclei properties (initialized as None). 3040 3041 nuclei_IS_data : pd.DataFrame or None 3042 DataFrame of nuclei data merged with Image Stream (IS) data (initialized as None). 3043 3044 concat_data : list or None 3045 List of other `NucleiDataManagement` objects added for combined analysis (initialized as None). 3046 """ 3047 3048 if set(nuclei_data[list(nuclei_data.keys())[0]].keys()) == set( 3049 ["stats", "img"] 3050 ): 3051 3052 self.nuceli_data = {} 3053 3054 for k in nuclei_data.keys(): 3055 self.nuceli_data[k] = nuclei_data[k]["stats"] 3056 3057 for k in self.nuceli_data.keys(): 3058 if "coords" in self.nuceli_data[k].keys(): 3059 self.nuceli_data[k].pop("coords") 3060 3061 else: 3062 self.nuceli_data = nuclei_data 3063 3064 for k in self.nuceli_data.keys(): 3065 if "coords" in self.nuceli_data[k].keys(): 3066 self.nuceli_data[k].pop("coords") 3067 3068 self.experiment_name = experiment_name 3069 """Name of the experiment.""" 3070 3071 self.nuceli_data_df = None 3072 """Stored DataFrame representation of nuclei features""" 3073 3074 self.nuclei_IS_data = None 3075 """Stored DataFrame of data from Image Stream (IS).""" 3076 3077 self.concat_data = None 3078 """Sotored list of other `NucleiDataManagement` objects.""" 3079 3080 @classmethod 3081 def load_nuc_dict(cls, path: str): 3082 """ 3083 Initialize a NucleiDataManagement object from a JSON dictionary file. 3084 3085 The loaded data must be previously saved using the ``save_nuc_project()`` method. 3086 3087 Parameters 3088 ---------- 3089 path : str 3090 Path to the *.nuc JSON file containing nuclei data. 3091 """ 3092 3093 if ".nuc" in path: 3094 3095 if os.path.exists(path): 3096 3097 with open(path, "r") as json_file: 3098 loaded_data = json.load(json_file) 3099 3100 return cls(loaded_data, os.path.splitext(os.path.basename(path))[0]) 3101 3102 else: 3103 raise ValueError("\nInvalid path!") 3104 3105 else: 3106 raise ValueError( 3107 "\nInvalid dictionary to load. It must contain a .nuc extension!" 3108 ) 3109 3110 def _convert_to_df(self): 3111 """ 3112 Helper method that converts the internal nuclei dictionary into a pandas DataFrame. 3113 3114 This method iterates over the nuclei data stored in `self.nuceli_data`, 3115 flattens the information for each nucleus, computes aggregate statistics 3116 for associated spots if present, and stores the resulting DataFrame in 3117 `self.nuceli_data_df`. 3118 """ 3119 3120 nuclei_data = self.nuceli_data 3121 3122 data = [] 3123 3124 for i in tqdm(nuclei_data.keys()): 3125 for n, _ in enumerate(nuclei_data[i]["area"]): 3126 row = { 3127 "id_name": re.sub("_.*", "", i), 3128 "nuclei_area": nuclei_data[i]["area"][n], 3129 "nuclei_area_bbox": nuclei_data[i]["area_bbox"][n], 3130 "nuclei_equivalent_diameter_area": nuclei_data[i][ 3131 "equivalent_diameter_area" 3132 ][n], 3133 "nuclei_feret_diameter_max": nuclei_data[i]["feret_diameter_max"][ 3134 n 3135 ], 3136 "nuclei_axis_major_length": nuclei_data[i]["axis_major_length"][n], 3137 "nuclei_axis_minor_length": nuclei_data[i]["axis_minor_length"][n], 3138 "nuclei_circularity": nuclei_data[i]["circularity"][n], 3139 "nuclei_eccentricity": nuclei_data[i]["eccentricity"][n], 3140 "nuclei_perimeter": nuclei_data[i]["perimeter"][n], 3141 "nuclei_ratio": nuclei_data[i]["ratio"][n], 3142 "nuclei_solidity": nuclei_data[i]["solidity"][n], 3143 } 3144 3145 if "spot_size_area" in nuclei_data[i]: 3146 if len(nuclei_data[i]["spot_size_area"][n]) > 0: 3147 row.update( 3148 { 3149 "spot_n": len(nuclei_data[i]["spot_size_area"][n]), 3150 "avg_spot_area": np.mean( 3151 nuclei_data[i]["spot_size_area"][n] 3152 ), 3153 "avg_spot_area_bbox": np.mean( 3154 nuclei_data[i]["spot_size_area_bbox"][n] 3155 ), 3156 "avg_spot_perimeter": np.mean( 3157 nuclei_data[i]["spot_perimeter"][n] 3158 ), 3159 "sum_spot_area": np.sum( 3160 nuclei_data[i]["spot_size_area"][n] 3161 ), 3162 "sum_spot_area_bbox": np.sum( 3163 nuclei_data[i]["spot_size_area_bbox"][n] 3164 ), 3165 "sum_spot_perimeter": np.sum( 3166 nuclei_data[i]["spot_perimeter"][n] 3167 ), 3168 "avg_spot_axis_major_length": np.mean( 3169 nuclei_data[i]["spot_axis_major_length"][n] 3170 ), 3171 "avg_spot_axis_minor_length": np.mean( 3172 nuclei_data[i]["spot_axis_minor_length"][n] 3173 ), 3174 "avg_spot_eccentricity": np.mean( 3175 nuclei_data[i]["spot_eccentricity"][n] 3176 ), 3177 "avg_spot_size_equivalent_diameter_area": np.mean( 3178 nuclei_data[i][ 3179 "spot_size_equivalent_diameter_area" 3180 ][n] 3181 ), 3182 "sum_spot_size_equivalent_diameter_area": np.sum( 3183 nuclei_data[i][ 3184 "spot_size_equivalent_diameter_area" 3185 ][n] 3186 ), 3187 } 3188 ) 3189 else: 3190 row.update( 3191 { 3192 k: 0 3193 for k in [ 3194 "spot_n", 3195 "avg_spot_area", 3196 "avg_spot_area_bbox", 3197 "avg_spot_perimeter", 3198 "sum_spot_area", 3199 "sum_spot_area_bbox", 3200 "sum_spot_perimeter", 3201 "avg_spot_axis_major_length", 3202 "avg_spot_axis_minor_length", 3203 "avg_spot_eccentricity", 3204 "avg_spot_size_equivalent_diameter_area", 3205 "sum_spot_size_equivalent_diameter_area", 3206 ] 3207 } 3208 ) 3209 3210 data.append(row) 3211 3212 nuclei_df = pd.DataFrame(data) 3213 3214 nuclei_df["nuclei_per_img"] = nuclei_df.groupby("id_name")["id_name"].transform( 3215 "count" 3216 ) 3217 nuclei_df["set"] = self.experiment_name 3218 3219 self.nuceli_data_df = nuclei_df 3220 3221 def add_IS_data(self, IS_data: pd.DataFrame, IS_features: list = []): 3222 """ 3223 Merge Image Stream (IS) data with nuclei analysis data. 3224 3225 This method concatenates IS (Image Stream, https://cytekbio.com/pages/imagestream) 3226 results with the nuclei data stored in the object. The merge is performed based 3227 on object IDs, allowing joint analysis of nuclei features and IS features. 3228 3229 Parameters 3230 ---------- 3231 IS_data : pd.DataFrame 3232 DataFrame containing IS data results. 3233 3234 IS_features : list, optional 3235 List of features to extract from the IS data. Default is an empty list. 3236 3237 Notes 3238 ----- 3239 The merged data will be stored in the attribute `self.nuclei_IS_data`. 3240 """ 3241 3242 nuclei_data = self._get_df() 3243 3244 IS_data["set"] = self.experiment_name 3245 3246 if len(IS_features) > 0: 3247 IS_features = list(set(IS_features + ["Object Number", "set"])) 3248 IS_data = IS_data[IS_features] 3249 3250 nuclei_data["id"] = ( 3251 nuclei_data["id_name"].astype(str) + "_" + nuclei_data["set"] 3252 ) 3253 IS_data["id"] = IS_data["Object Number"].astype(str) + "_" + IS_data["set"] 3254 3255 merged_data = pd.merge(nuclei_data, IS_data, on="id", how="left") 3256 merged_data.pop("set_x") 3257 merged_data = merged_data.rename(columns={"set_y": "set"}) 3258 3259 self.nuclei_IS_data = merged_data 3260 3261 def _get_df(self): 3262 """ 3263 Helper method to retrieve the nuclei data as a pandas DataFrame. 3264 3265 If the internal DataFrame `self.nuceli_data_df` has not been created yet, 3266 this method calls `_convert_to_df()` to generate it from `self.nuceli_data`. 3267 """ 3268 3269 if self.nuceli_data_df is None: 3270 self._convert_to_df() 3271 3272 return self.nuceli_data_df 3273 3274 def get_data_with_IS(self): 3275 """ 3276 Retrieve nuclei results for a single project including IS data. 3277 3278 Returns 3279 ------- 3280 pd.DataFrame or None 3281 DataFrame containing nuclei results merged with IS (Image Stream) data 3282 added via `self.add_IS_data()`. Returns None if no IS data has been added. 3283 """ 3284 3285 if self.nuclei_IS_data is None: 3286 print("\nNothing to return!") 3287 return self.nuclei_IS_data 3288 3289 def get_data(self): 3290 """ 3291 Retrieve nuclei results for a single project as a pandas DataFrame. 3292 3293 Returns 3294 ------- 3295 pd.DataFrame 3296 DataFrame containing nuclei analysis results for the experiment. 3297 """ 3298 3299 return self._get_df() 3300 3301 def save_nuc_project(self, path: str = ""): 3302 """ 3303 Save nuclei results as a JSON file with a *.nuc extension. 3304 3305 The saved data can later be loaded using the `cls.load_nuc_dict()` method. 3306 Results must be obtained from the `NucleiFinder` class using 3307 `series_analysis_nuclei()` or `series_analysis_chromatinization()` methods. 3308 3309 Parameters 3310 ---------- 3311 path : str, optional 3312 Directory where the results will be saved. Default is the current working directory. 3313 """ 3314 3315 data = self.nuceli_data 3316 3317 if len(data.keys()) > 0: 3318 full_path = os.path.join(path, self.experiment_name) 3319 3320 with open(full_path + ".nuc", "w") as json_file: 3321 json.dump(data, json_file, indent=4) 3322 else: 3323 print("\nData not provided!") 3324 3325 def save_results_df(self, path: str = ""): 3326 """ 3327 Save nuclei results for a single project as a CSV file. 3328 3329 Results must be obtained from the `NucleiFinder` class using 3330 `series_analysis_nuclei()` or `series_analysis_chromatinization()` methods. 3331 3332 Parameters 3333 ---------- 3334 path : str, optional 3335 Directory where the CSV file will be saved. Default is the current working directory. 3336 """ 3337 3338 data = self.get_data() 3339 3340 full_path = os.path.join(path, f"{self.experiment_name}.csv") 3341 3342 data.to_csv(full_path, index=False) 3343 3344 def save_results_df_with_IS(self, path: str = ""): 3345 """ 3346 Save nuclei results with IS data for a single project as a CSV file. 3347 3348 Results must be obtained from the `NucleiFinder` class using 3349 `series_analysis_nuclei()` or `series_analysis_chromatinization()` methods. 3350 IS data should have been added via `self.add_IS_data()`. 3351 3352 Parameters 3353 ---------- 3354 path : str, optional 3355 Directory where the CSV file will be saved. Default is the current working directory. 3356 """ 3357 3358 data = self.get_data_with_IS() 3359 3360 if data is None: 3361 raise ValueError("There was nothing to save.") 3362 3363 full_path = os.path.join(path, f"{self.experiment_name}_IS.csv") 3364 data.to_csv(full_path, index=False) 3365 3366 def add_experiment(self, data_list: list): 3367 """ 3368 Add additional NucleiDataManagement objects from other experiments for concatenation. 3369 3370 Parameters 3371 ---------- 3372 data_list : list 3373 List of `NucleiDataManagement` objects from separate experiments to be added. 3374 """ 3375 3376 valid_class = [] 3377 for obj in data_list: 3378 if isinstance(obj, self.__class__): 3379 valid_class.append(obj) 3380 else: 3381 print(f"Object {obj} is invalid type.") 3382 3383 self.concat_data = valid_class 3384 3385 def get_mutual_experiments_data(self, inc_is: bool = False): 3386 """ 3387 Retrieve concatenated NucleiDataManagement data from other added experiments. 3388 3389 Parameters 3390 ---------- 3391 inc_is : bool, optional 3392 Whether to include IS (Image Stream) data, if it was added to each experiment. Default is False. 3393 3394 Returns 3395 ------- 3396 pd.DataFrame 3397 Concatenated nuclei data (with or without IS data) from all added experiments. 3398 """ 3399 3400 if self.concat_data is not None: 3401 if inc_is: 3402 3403 try: 3404 final_df = pd.concat( 3405 [x.get_data_with_IS() for x in self.concat_data] 3406 + [self.get_data_with_IS()] 3407 ) 3408 except: 3409 raise ValueError( 3410 "Lack of IS data in some object. Check if the IS data was added to each project." 3411 ) 3412 3413 else: 3414 final_df = pd.concat( 3415 [x.get_data() for x in self.concat_data] + [self.get_data()] 3416 ) 3417 3418 return final_df 3419 3420 raise ValueError("No object to concatenate. Nothing to return!") 3421 3422 def save_mutual_experiments(self, path: str = "", inc_is: bool = False): 3423 """ 3424 Save concatenated NucleiDataManagement data from added experiments as a CSV file. 3425 3426 Parameters 3427 ---------- 3428 inc_is : bool, optional 3429 Whether to include IS (Image Stream) data, if it was added to each experiment. Default is False. 3430 """ 3431 3432 dt = self.get_mutual_experiments_data(inc_is=inc_is) 3433 3434 experimets = [self.experiment_name] + [ 3435 n.experiment_name for n in self.concat_data 3436 ] 3437 3438 experimets_names = "_".join(experimets) 3439 3440 if inc_is: 3441 full_path = os.path.join(path, f"{experimets_names}_IS.csv") 3442 else: 3443 full_path = os.path.join(path, f"{experimets_names}.csv") 3444 3445 dt.to_csv(full_path, index=False) 3446 3447 3448class GroupAnalysis: 3449 """ 3450 A class for performing multivariate analysis, dimensionality reduction, 3451 clustering, and differential feature analysis (DFA) on biological or 3452 experimental datasets. 3453 3454 This class provides tools for: 3455 - Scaling and PCA of input data 3456 - UMAP embedding and DBSCAN clustering 3457 - Differential Feature Analysis across groups 3458 - Proportion analysis and plotting 3459 - Data selection and merging with metadata 3460 3461 Attributes 3462 ---------- 3463 input_data : pd.DataFrame 3464 The primary dataset containing features for analysis. 3465 3466 input_metadata : pd.DataFrame 3467 Metadata corresponding to the input data, including identifiers and group labels. 3468 3469 tmp_data : pd.DataFrame 3470 Temporary copy of the input data, used for feature selection and filtering. 3471 3472 tmp_metadata : pd.DataFrame 3473 Temporary copy of metadata, used for filtered or subsetted operations. 3474 3475 scaled_data : np.ndarray or None 3476 Scaled version of the temporary dataset (`tmp_data`), updated after `data_scale()`. 3477 3478 PCA_results : np.ndarray or None 3479 Results of PCA transformation applied on scaled data. 3480 3481 var_data : np.ndarray or None 3482 Explained variance ratio from PCA. 3483 3484 knee_plot : matplotlib.figure.Figure or None 3485 Figure of cumulative explained variance for PCA components. 3486 3487 UMAP_data : np.ndarray or None 3488 Embedding results from UMAP dimensionality reduction. 3489 3490 UMAP_plot : dict 3491 Dictionary containing UMAP plots. Keys: 'static' (matplotlib) and 'html' (plotly). 3492 3493 dblabels : list or None 3494 Cluster labels assigned by DBSCAN after UMAP embedding. 3495 3496 explained_variance_ratio : np.ndarray or None 3497 Explained variance ratio of PCA components. 3498 3499 DFA_results : pd.DataFrame or None 3500 Results of Differential Feature Analysis (DFA). 3501 3502 proportion_stats : pd.DataFrame or None 3503 Statistics from proportion analysis. 3504 3505 proportion_plot : matplotlib.figure.Figure or None 3506 Figure of proportion analysis results. 3507 3508 Methods 3509 ------- 3510 resest_project(): 3511 Reset all temporary and analysis results to initial state. 3512 3513 load_data(data, ids_col='id_name', set_col='set'): 3514 Class method to load data and metadata and initialize the object. 3515 3516 groups: 3517 Property returning available groups in the metadata. 3518 3519 get_DFA(), get_PCA(), get_knee_plot(), get_var_data(), get_scaled_data(): 3520 Methods to retrieve previously computed results. 3521 3522 UMAP(), db_scan(), UMAP_on_clusters(): 3523 Methods for dimensionality reduction and clustering visualization. 3524 3525 DFA(meta_group_by='sets', sets={}, n_proc=5): 3526 Perform Differential Feature Analysis. 3527 3528 proportion_analysis(grouping_col='sets', val_col='nuclei_per_img', ...): 3529 Perform and plot proportion analysis across groups. 3530 """ 3531 3532 def __init__( 3533 self, 3534 input_data, 3535 input_metadata, 3536 ): 3537 """ 3538 Initialize a GroupAnalysis instance with data and metadata. 3539 3540 Parameters 3541 ---------- 3542 input_data : pd.DataFrame 3543 Dataset containing features for analysis. Rows represent samples and columns represent features. 3544 3545 input_metadata : pd.DataFrame 3546 Metadata corresponding to `input_data`, including sample identifiers and group labels. 3547 """ 3548 3549 self.input_data = input_data 3550 """Stored input dataset for analysis.""" 3551 3552 self.input_metadata = input_metadata 3553 """Stored metadata associated with `input_data`.""" 3554 3555 self.tmp_metadata = input_metadata 3556 """Temporary copy of `input_data` used for filtering, selection, or scaling.""" 3557 3558 self.tmp_data = input_data 3559 """Temporary copy of `input_metadata` used for filtered operations.""" 3560 3561 self.scaled_data = None 3562 """Stored scaled version of `tmp_data` after normalization or standardization.""" 3563 3564 self.PCA_results = None 3565 """ Stored results of PCA transformation applied on `scaled_data`.""" 3566 3567 self.var_data = None 3568 """Sotred explained variance ratio for PCA components.""" 3569 3570 self.knee_plot = None 3571 """Figure showing cumulative explained variance for PCA.""" 3572 3573 self.UMAP_data = None 3574 """Stored embedding coordinates from UMAP dimensionality reduction.""" 3575 3576 self.UMAP_plot = {"static": {}, "html": {}} 3577 """Stored dictionary containing UMAP plots: 'static' (matplotlib) and 'html' (plotly).""" 3578 3579 self.dblabels = None 3580 """Stored cluster labels assigned by DBSCAN after UMAP embedding.""" 3581 3582 self.explained_variance_ratio = None 3583 """Stored explained variance ratio of PCA components.""" 3584 3585 self.DFA_results = None 3586 """Stored Differential Feature Analysis (DFA) results.""" 3587 3588 self.proportion_stats = None 3589 """Stored statistics from proportion analysis of groups.""" 3590 3591 self.proportion_plot = None 3592 """Figure visualizing proportion analysis results.""" 3593 3594 def resest_project(self): 3595 """ 3596 Resets the project state by clearing or reinitializing various attributes. 3597 3598 This method resets the following attributes to initial values: 3599 - `tmp_metadata` 3600 - `tmp_data` 3601 - `scaled_data` 3602 - `PCA_results` 3603 - `var_data` 3604 - `knee_plot` 3605 - `UMAP_data` 3606 - `UMAP_plot` 3607 - `dblabels` 3608 - `explained_variance_ratio` 3609 - `DFA_results` 3610 3611 This method is typically called to reinitialize the project data and results, preparing the system for new computations or project resets. 3612 """ 3613 3614 self.tmp_metadata = self.input_metadata 3615 self.tmp_data = self.input_data 3616 self.scaled_data = None 3617 self.PCA_results = None 3618 self.var_data = None 3619 self.knee_plot = None 3620 self.UMAP_data = None 3621 self.UMAP_plot = {"static": {}, "html": {}} 3622 self.dblabels = None 3623 self.explained_variance_ratio = None 3624 self.DFA_results = None 3625 self.proportion_stats = None 3626 self.proportion_plot = None 3627 3628 @classmethod 3629 def load_data(cls, data, ids_col: str = "id_name", set_col: str = "set"): 3630 """ 3631 Load data and initialize the class by storing both the feature data and metadata. 3632 3633 Parameters 3634 ---------- 3635 data : pd.DataFrame 3636 Input dataset used for group analysis. Must contain both feature columns and 3637 metadata columns specified by `ids_col` and `set_col`. 3638 3639 ids_col : str, optional 3640 Name of the column containing unique object identifiers. 3641 Default is ``'id_name'``. 3642 3643 set_col : str, optional 3644 Name of the column specifying group or set assignment for each object. 3645 Default is ``'set'``. 3646 3647 Notes 3648 ----- 3649 This method performs in-place initialization of the class and does not return 3650 a separate object. All loaded data and metadata become available through the 3651 class attributes for downstream analysis. 3652 3653 This method updates internal class attributes: 3654 3655 - **input_data** : pd.DataFrame 3656 Cleaned feature table with index set to object IDs. 3657 3658 - **tmp_data** : pd.DataFrame 3659 Copy of `input_data` used for temporary operations. 3660 3661 - **input_metadata** : pd.DataFrame 3662 Metadata containing object IDs and group assignments. 3663 3664 - **tmp_metadata** : pd.DataFrame 3665 Copy of `input_metadata` for temporary operations. 3666 """ 3667 3668 data = data.dropna() 3669 3670 metadata = pd.DataFrame() 3671 metadata["id"] = data[ids_col] 3672 metadata["sets"] = data[set_col] 3673 3674 data.index = data[ids_col] 3675 3676 try: 3677 data.pop("id_name") 3678 except: 3679 None 3680 3681 try: 3682 data.pop("Object Number") 3683 except: 3684 None 3685 3686 return cls(data, metadata) 3687 3688 @property 3689 def groups(self): 3690 """ 3691 Return information about available groups in the metadata for ``self.DFA``. 3692 3693 Returns 3694 ------- 3695 dict 3696 Dictionary mapping each metadata column name to a list of unique groups 3697 available in that column. 3698 """ 3699 3700 try: 3701 return { 3702 "sets": set(self.tmp_metadata["sets"]), 3703 "full_name": set(self.tmp_metadata["full_name"]), 3704 } 3705 except: 3706 return {"sets": set(self.tmp_metadata["sets"])} 3707 3708 def get_DFA(self): 3709 """ 3710 Retrieve the DFA results produced by the ``DFA()`` method. 3711 3712 Returns 3713 ------- 3714 pd.DataFrame 3715 The DFA results stored in ``self.DFA_results``. 3716 """ 3717 3718 if None in self.DFA_results: 3719 print("\nNo results to return! Please run the DFA() method first.") 3720 else: 3721 return self.DFA_results 3722 3723 def get_PCA(self): 3724 """ 3725 Retrieve the PCA results produced by the ``PCA()`` method. 3726 3727 Returns 3728 ------- 3729 np.ndarray 3730 The PCA results stored in ``self.PCA_results``. 3731 """ 3732 3733 if None in self.PCA_results: 3734 print("\nNo results to return! Please run the PCA() method first.") 3735 else: 3736 return self.PCA_results 3737 3738 def get_knee_plot(self, show: bool = True): 3739 """ 3740 Retrieve the knee plot of cumulative explained variance generated by the ``var_plot()`` method. 3741 3742 Parameters 3743 ---------- 3744 show : bool, optional 3745 If ``True`` (default), the knee plot is displayed. 3746 3747 Returns 3748 ------- 3749 matplotlib.figure.Figure 3750 The figure object containing the knee plot. 3751 """ 3752 3753 if self.knee_plot is None: 3754 print("\nNo results to return! Please run the var_plot() method first.") 3755 else: 3756 if cfg._DISPLAY_MODE: 3757 if show is True: 3758 self.knee_plot 3759 try: 3760 display(self.knee_plot) 3761 except: 3762 None 3763 3764 return self.knee_plot 3765 3766 def get_var_data(self): 3767 """ 3768 Retrieve the explained variance data from the ``var_plot()`` method. 3769 3770 Returns 3771 ------- 3772 np.ndarray 3773 Array containing the explained variance values stored in ``self.var_data``. 3774 """ 3775 3776 if None in self.var_data: 3777 print("\nNo results to return! Please run the var_plot() method first.") 3778 else: 3779 return self.var_data 3780 3781 def get_scaled_data(self): 3782 """ 3783 Retrieve the scaled data produced by the ``data_scale()`` method. 3784 3785 Returns 3786 ------- 3787 np.ndarray 3788 Scaled data stored in ``self.scaled_data``. 3789 """ 3790 3791 if None in self.scaled_data: 3792 print("\nNo results to return! Please run the data_scale() method first.") 3793 else: 3794 return self.scaled_data 3795 3796 def get_UMAP_data(self): 3797 """ 3798 Retrieve the UMAP-transformed data generated by the ``UMAP()`` method. 3799 3800 Returns 3801 ------- 3802 np.ndarray 3803 UMAP-embedded data stored in ``self.UMAP_data``. 3804 """ 3805 3806 if None in self.UMAP_data: 3807 print("\nNo results to return! Please run the UMAP() method first.") 3808 else: 3809 return self.UMAP_data 3810 3811 def get_UMAP_plots(self, plot_type: str = "static", show: bool = True): 3812 """ 3813 Retrieve UMAP plots generated by the ``UMAP()`` and/or ``UMAP_on_clusters()`` methods. 3814 3815 Parameters 3816 ---------- 3817 show : bool, optional 3818 Whether to display the UMAP plots. Default is True. 3819 3820 Returns 3821 ------- 3822 dict of matplotlib.figure.Figure 3823 A dictionary containing the UMAP plots. Keys correspond to plot names, and values are the figure objects. 3824 """ 3825 3826 if plot_type == "html": 3827 3828 if len(self.UMAP_plot["html"].keys()) == 0: 3829 print( 3830 "\nNo results to return! Please run the UMAP() and / or UMAP_on_clusters() methods first." 3831 ) 3832 else: 3833 if cfg._DISPLAY_MODE: 3834 if show: 3835 for k in self.UMAP_plot["html"].keys(): 3836 self.UMAP_plot["html"][k] 3837 try: 3838 display(self.UMAP_plot["html"][k]) 3839 except: 3840 None 3841 3842 return self.UMAP_plot["html"] 3843 3844 else: 3845 3846 if len(self.UMAP_plot["static"].keys()) == 0: 3847 print( 3848 "\nNo results to return! Please run the UMAP() and / or UMAP_on_clusters() methods first." 3849 ) 3850 else: 3851 if cfg._DISPLAY_MODE: 3852 if show: 3853 for k in self.UMAP_plot["static"].keys(): 3854 self.UMAP_plot["static"][k] 3855 try: 3856 display(self.UMAP_plot["static"][k]) 3857 except: 3858 None 3859 3860 return self.UMAP_plot["static"] 3861 3862 def select_data(self, features_list: list = []): 3863 """ 3864 Select specific features (columns) from the dataset for further analysis. 3865 3866 Parameters 3867 ---------- 3868 features_list : list of str, optional 3869 List of feature names (column names) to select from the dataset. Default is an empty list, which selects no features. 3870 3871 Notes 3872 ----- 3873 Modifies the `self.tmp_data` attribute to contain only the selected features from `self.input_data`. 3874 """ 3875 3876 dat = self.input_data.copy() 3877 3878 not_in_columns = [name for name in features_list if name not in dat.columns] 3879 3880 if not_in_columns: 3881 print("These names are not in data", not_in_columns) 3882 else: 3883 print("All names are present in data.") 3884 3885 in_columns = [name for name in features_list if name in dat.columns] 3886 3887 dat = dat[in_columns] 3888 3889 self.tmp_data = dat 3890 3891 def data_scale(self): 3892 """ 3893 Scale the data using standardization (z-score normalization). 3894 3895 This method applies `StandardScaler` from scikit-learn to the temporary dataset (`self.tmp_data`) and stores the scaled data. 3896 3897 Notes 3898 ----- 3899 Modifies the `self.scaled_data` attribute to contain the standardized version of `self.tmp_data`. 3900 """ 3901 3902 if None not in self.tmp_data: 3903 3904 def is_id_column(name: str): 3905 name_lower = name.lower() 3906 return name_lower == "id" or "id_" in name_lower or "_id" in name_lower 3907 3908 tmp = self.tmp_data 3909 3910 cols_with_strings = [ 3911 c 3912 for c in tmp.columns 3913 if tmp[c].apply(lambda x: isinstance(x, str)).any() 3914 ] 3915 3916 cols_id_pattern = [c for c in tmp.columns if is_id_column(c)] 3917 3918 cols_to_drop = list(set(cols_id_pattern + cols_with_strings)) 3919 3920 tmp = tmp.drop(columns=cols_to_drop) 3921 3922 scaler = StandardScaler() 3923 3924 self.scaled_data = scaler.fit_transform(tmp) 3925 3926 else: 3927 print( 3928 "\nNo data to scale. Please use the load_data() method first, and optionally the select_data() method." 3929 ) 3930 3931 def PCA(self): 3932 """ 3933 Perform Principal Component Analysis (PCA) on the scaled data. 3934 3935 This method reduces the dimensionality of `self.scaled_data` while retaining the maximum variance. 3936 3937 Notes 3938 ----- 3939 Modifies the `self.PCA_results` attribute with the PCA-transformed data. 3940 """ 3941 3942 if None not in self.scaled_data: 3943 pca = PCA(n_components=self.scaled_data.shape[1]) 3944 self.PCA_results = pca.fit_transform(self.scaled_data) 3945 self.explained_variance_ratio = pca.explained_variance_ratio_ 3946 else: 3947 print("\nNo data for PCA. Please use the data_scale() method first.") 3948 3949 def var_plot(self): 3950 """ 3951 Plot the cumulative explained variance of the principal components from PCA. 3952 3953 This method visualizes the cumulative explained variance to help determine how many components capture most of the variance. 3954 3955 Notes 3956 ----- 3957 Stores results in the following attributes: 3958 - `self.var_data` (np.ndarray): Explained variance ratio for each principal component. 3959 - `self.knee_plot` (matplotlib.figure.Figure): Figure of the cumulative explained variance plot. 3960 """ 3961 3962 if None not in self.PCA_results: 3963 3964 fig, _ = plt.subplots(figsize=(15, 7)) 3965 explained_var = self.explained_variance_ratio 3966 3967 cumulative_var = np.cumsum(explained_var) 3968 3969 # Plot the cumulative explained variance as a function of the number of components 3970 plt.plot(cumulative_var) 3971 plt.xlabel("Number of Components") 3972 plt.ylabel("Cumulative Explained Variance") 3973 plt.title("Explained variance of PCs") 3974 plt.xticks(np.arange(0, len(cumulative_var) + 1, step=1)) 3975 3976 self.var_data = explained_var 3977 self.knee_plot = fig 3978 3979 else: 3980 3981 print( 3982 "\nNo data for variance explanation analysis. Please use the PCA() method first." 3983 ) 3984 3985 def UMAP( 3986 self, 3987 PC_num: int = 5, 3988 factorize_with_metadata: bool = False, 3989 harmonize_sets: bool = True, 3990 n_neighbors: int = 25, 3991 min_dist: float = 0.01, 3992 n_components: int = 2, 3993 width: int = 8, 3994 height: int = 6, 3995 ): 3996 """ 3997 Perform UMAP (Uniform Manifold Approximation and Projection) dimensionality reduction on PCA results. 3998 3999 UMAP is applied to the top principal components, optionally using metadata labels to influence the embedding. Generates both 2D/3D embeddings and visualizations. 4000 4001 Parameters 4002 ---------- 4003 PC_num : int, optional 4004 Number of top principal components to use for UMAP embedding. Default is 5. 4005 4006 factorize_with_metadata : bool, optional 4007 Whether to use metadata (e.g., 'sets') to factorize UMAP embedding. Default is False. 4008 4009 harmonize_sets : bool, optional 4010 If True, applies harmonization across data sets before computing the UMAP embedding. 4011 Default is True. 4012 4013 n_neighbors : int, optional 4014 Number of neighbors for UMAP to compute local structure. Default is 25. 4015 4016 min_dist : float, optional 4017 Minimum distance between points in the low-dimensional embedding. Default is 0.01. 4018 4019 n_components : int, optional 4020 Number of dimensions for the UMAP embedding. Default is 2. 4021 4022 width : int, optional 4023 Width of the generated matplotlib figures (in inches). Default is 8. 4024 4025 height : int, optional 4026 Height of the generated matplotlib figures (in inches). Default is 6. 4027 4028 Notes 4029 ----- 4030 Stores results in the following attributes: 4031 - `self.UMAP_data` (np.ndarray): UMAP-transformed data. 4032 - `self.UMAP_plot['static']['PrimaryUMAP']` (matplotlib.figure.Figure): Static visualization of UMAP embedding. 4033 - `self.UMAP_plot['html']['PrimaryUMAP']` (plotly.graph_objs.Figure): Interactive Plotly visualization of UMAP embedding. 4034 """ 4035 4036 if None not in self.PCA_results: 4037 4038 reducer = umap.UMAP( 4039 n_neighbors=n_neighbors, 4040 min_dist=min_dist, 4041 n_components=n_components, 4042 random_state=42, 4043 ) 4044 4045 pca_res = self.PCA_results 4046 4047 if harmonize_sets: 4048 4049 pca_res = np.array(pca_res) 4050 4051 pca_res = np.array( 4052 harmonize.run_harmony( 4053 pca_res, self.input_metadata, vars_use="sets" 4054 ).Z_corr 4055 ).T 4056 4057 if factorize_with_metadata: 4058 numeric_labels = pd.Categorical(self.tmp_metadata["sets"]).codes 4059 4060 umap_result = reducer.fit_transform( 4061 pca_res[:, : PC_num + 1], y=numeric_labels 4062 ) 4063 4064 else: 4065 umap_result = reducer.fit_transform(pca_res[:, : PC_num + 1]) 4066 4067 umap_result_plot = pd.DataFrame(umap_result.copy()) 4068 4069 umap_result_plot["clusters"] = list(self.tmp_metadata["sets"]) 4070 4071 static_fig = umap_static(umap_result_plot, width=width, height=height) 4072 4073 html_fig = umap_html( 4074 umap_result_plot, width=width * 100, height=height * 100 4075 ) 4076 4077 self.UMAP_data = umap_result 4078 4079 self.UMAP_plot["static"]["PrimaryUMAP"] = static_fig 4080 self.UMAP_plot["html"]["PrimaryUMAP"] = html_fig 4081 4082 else: 4083 4084 print("\nNo data for UMAP. Please use the PCA() method first.") 4085 4086 def db_scan(self, eps=0.5, min_samples: int = 10): 4087 """ 4088 Perform DBSCAN clustering on UMAP-transformed data. 4089 4090 DBSCAN identifies clusters based on density, labeling points in dense regions as clusters and others as noise. 4091 4092 Parameters 4093 ---------- 4094 eps : float, optional 4095 Maximum distance between two points to be considered neighbors. Default is 0.5. 4096 4097 min_samples : int, optional 4098 Minimum number of points required to form a dense region (cluster). Default is 10. 4099 4100 Notes 4101 ----- 4102 Stores the results in the following attribute: 4103 - `self.dblabels` (list of str): Cluster labels assigned by DBSCAN for each point in the UMAP embedding. 4104 """ 4105 4106 if None not in self.UMAP_data: 4107 4108 dbscan = DBSCAN(eps=eps, min_samples=min_samples) 4109 dbscan_labels = dbscan.fit_predict(self.UMAP_data) 4110 self.dblabels = [str(x) for x in dbscan_labels] 4111 4112 else: 4113 4114 print("\nNo data for DBSCAN. Please use the UMAP() method first.") 4115 4116 def UMAP_on_clusters( 4117 self, 4118 min_entities: int = 50, 4119 width: int = 8, 4120 height: int = 6, 4121 n_per_col: int = 20, 4122 ): 4123 """ 4124 Generate UMAP visualizations for clusters filtered by a minimum entity threshold. 4125 4126 This method removes clusters containing fewer than `min_entities` observations 4127 and produces two UMAP visualizations: 4128 4129 1. **Cluster UMAP** – points colored by cluster assignment only. 4130 2. **Cluster × Set UMAP** – points colored by the combination of cluster and set identifier. 4131 4132 Parameters 4133 ---------- 4134 min_entities : int, optional 4135 Minimum number of entities required for a cluster to be included 4136 in the visualization. Default is 50. 4137 4138 width : int, optional 4139 Width of the generated matplotlib figures (in inches). Default is 8. 4140 4141 height : int, optional 4142 Height of the generated matplotlib figures (in inches). Default is 6. 4143 4144 n_per_col : int, optional 4145 Maximum number of legend entries per column. Default is 20. 4146 4147 Notes 4148 ----- 4149 This method updates the following attributes: 4150 4151 - `self.UMAP_plot['static']['ClusterUMAP']` 4152 Static matplotlib figure of the filtered cluster-only UMAP. 4153 4154 - `self.UMAP_plot['html']['ClusterUMAP']` 4155 Interactive HTML version of the cluster-only UMAP. 4156 4157 - `self.UMAP_plot['static']['ClusterXSetsUMAP']` 4158 Static matplotlib figure showing clusters combined with set identifiers. 4159 4160 - `self.UMAP_plot['html']['ClusterXSetsUMAP']` 4161 Interactive HTML version of the cluster × set visualization. 4162 4163 - `self.tmp_data` 4164 Dataset filtered to include only clusters meeting the `min_entities` threshold. 4165 4166 - `self.tmp_metadata` 4167 Metadata corresponding to the filtered dataset. 4168 """ 4169 4170 if None not in self.UMAP_data: 4171 4172 if hasattr(self, "_tmp_data_old"): 4173 self.tmp_data = self._tmp_data_old 4174 4175 if hasattr(self, "_tmp_metadata_old"): 4176 self.tmp_metadata = self._tmp_metadata_old 4177 4178 umap_result = pd.DataFrame(self.UMAP_data.copy()) 4179 umap_result["id"] = self.tmp_metadata.index 4180 umap_result["clusters"] = self.dblabels 4181 umap_result = umap_result[umap_result["clusters"] != "-1"] 4182 tmp_metadata = self.tmp_metadata.copy() 4183 tmp_metadata["clusters"] = self.dblabels 4184 tmp_metadata = tmp_metadata[tmp_metadata["clusters"] != "-1"] 4185 tmp_data = self.tmp_data.copy() 4186 tmp_data.index = self.dblabels 4187 tmp_data = tmp_data[tmp_data.index != "-1"] 4188 4189 label_counts_dict = Counter(self.dblabels) 4190 4191 label_counts = pd.DataFrame.from_dict( 4192 label_counts_dict, orient="index", columns=["count"] 4193 ) 4194 4195 filtered_counts = label_counts[label_counts["count"] > min_entities] 4196 4197 tmp_metadata["full_id"] = list( 4198 tmp_metadata["id"].astype(str) + " # " + tmp_metadata["sets"] 4199 ) 4200 4201 tmp_data.index = tmp_metadata["full_id"] 4202 umap_result["full_id"] = list(tmp_metadata["full_id"]) 4203 4204 umap_result = umap_result[ 4205 umap_result["clusters"].isin(np.array(filtered_counts.index)) 4206 ] 4207 tmp_metadata = tmp_metadata[ 4208 tmp_metadata["clusters"].isin(np.array(filtered_counts.index)) 4209 ] 4210 4211 umap_result = umap_result.sort_values( 4212 by="clusters", key=lambda x: x.astype(int) 4213 ) 4214 4215 tmp_data = tmp_data[tmp_data.index.isin(np.array(tmp_metadata["full_id"]))] 4216 4217 static_fig = umap_static( 4218 umap_result, width=width, height=height, n_per_col=n_per_col 4219 ) 4220 4221 html_fig = umap_html(umap_result, width=width * 100, height=height * 100) 4222 4223 self.UMAP_plot["static"]["ClusterUMAP"] = static_fig 4224 self.UMAP_plot["html"]["ClusterUMAP"] = html_fig 4225 4226 tmp_metadata["full_name"] = list( 4227 tmp_metadata["clusters"] + " # " + tmp_metadata["sets"] 4228 ) 4229 4230 label_counts_dict = Counter(list(tmp_metadata["full_name"])) 4231 4232 label_counts = pd.DataFrame.from_dict( 4233 label_counts_dict, orient="index", columns=["count"] 4234 ) 4235 4236 filtered_counts = label_counts[label_counts["count"] > min_entities] 4237 4238 tmp_data.index = tmp_metadata["full_name"] 4239 umap_result["clusters"] = list(tmp_metadata["full_name"]) 4240 4241 umap_result = umap_result[ 4242 umap_result["clusters"].isin(np.array(filtered_counts.index)) 4243 ] 4244 4245 tmp_metadata = tmp_metadata[ 4246 tmp_metadata["full_name"].isin(np.array(filtered_counts.index)) 4247 ] 4248 4249 tmp_data = tmp_data[tmp_data.index.isin(np.array(filtered_counts.index))] 4250 4251 static_fig = umap_static( 4252 umap_result, width=width, height=height, n_per_col=n_per_col 4253 ) 4254 4255 html_fig = umap_html(umap_result, width=width * 100, height=height * 100) 4256 4257 self.UMAP_plot["static"]["ClusterXSetsUMAP"] = static_fig 4258 4259 self.UMAP_plot["html"]["ClusterXSetsUMAP"] = html_fig 4260 4261 self._tmp_data_old = self.tmp_data 4262 self._tmp_metadata_old = self.tmp_metadata 4263 4264 self.tmp_data = tmp_data 4265 self.tmp_metadata = tmp_metadata 4266 4267 else: 4268 print( 4269 "\nNo data for visualization. Please use the UMAP() and db_scan() methods first." 4270 ) 4271 4272 ## save data 4273 def full_info(self): 4274 """ 4275 Merge data with metadata based on the 'full_id' column. 4276 4277 This method combines `self.tmp_data` and `self.tmp_metadata` into a single DataFrame if the metadata contains a 'full_id' column. If 'full_id' is not present, the method prints a warning to complete the preprocessing pipeline. 4278 4279 Returns 4280 ------- 4281 pd.DataFrame or None 4282 Merged DataFrame containing both data and metadata if 'full_id' exists; otherwise, None. 4283 """ 4284 4285 tmp_data = self.tmp_data.copy() 4286 tmp_metadata = self.tmp_metadata.copy() 4287 4288 if "full_id" in tmp_metadata.columns: 4289 tmp_data.index = tmp_metadata["full_id"] 4290 4291 merged_df = tmp_data.merge( 4292 tmp_metadata, left_index=True, right_on="full_id", how="left" 4293 ) 4294 4295 return merged_df 4296 4297 else: 4298 4299 print("\nMetadata is not completed!") 4300 4301 ################################################################################# 4302 4303 def DFA(self, meta_group_by: str = "sets", sets: dict = {}, n_proc=5): 4304 """ 4305 Perform Differential Feature Analysis (DFA) on specified data groups. 4306 4307 This method conducts DFA using a grouping factor from metadata and a dictionary of sets for comparison. It allows for the identification of significant differences across defined sets. 4308 4309 The analysis includes: 4310 - Mann–Whitney U test 4311 - Percentage of non-zero values 4312 - Means and standard deviations 4313 - Effect size metric (ESM) 4314 - Benjamini–Hochberg FDR correction 4315 - Fold-change and log2 fold-change 4316 4317 Parameters 4318 ---------- 4319 meta_group_by : str, optional 4320 Metadata column used for grouping during the analysis. 4321 Default is ``'sets'``. 4322 To view available grouping categories, use ``self.groups``. 4323 4324 sets : dict, optional 4325 Dictionary defining groups for pairwise comparison. 4326 Keys correspond to group names, and values are lists of labels 4327 belonging to each group. 4328 4329 Example 4330 ------- 4331 >>> sets = { 4332 ... 'healthy': ['21q'], 4333 ... 'disease': ['71q', '77q', '109q'] 4334 ... } 4335 In this configuration, the *healthy* group is compared against the 4336 aggregated *disease* groups. 4337 4338 n_proc : int, optional 4339 Number of CPU cores used for parallel processing. 4340 Default is ``5``. 4341 4342 Returns 4343 ------- 4344 pandas.DataFrame or None 4345 A DataFrame containing statistical results for each feature, including: 4346 4347 - ``feature`` : str 4348 - ``p_val`` : float 4349 - ``adj_pval`` : float 4350 - ``pct_valid`` : float 4351 - ``pct_ctrl`` : float 4352 - ``avg_valid`` : float 4353 - ``avg_ctrl`` : float 4354 - ``sd_valid`` : float 4355 - ``sd_ctrl`` : float 4356 - ``esm`` : float 4357 - ``FC`` : float 4358 - ``log(FC)`` : float 4359 - ``norm_diff`` : float 4360 - ``valid_group`` : str 4361 - ``-log(p_val)`` : float 4362 4363 If ``sets`` is ``None``, results for each group are concatenated. 4364 4365 Returns ``None`` in case of errors or invalid parameters. 4366 4367 Notes 4368 ----- 4369 - Columns containing only zeros are automatically removed. 4370 - p-values equal for both groups produce ``p_val = 1``. 4371 - Benjamini–Hochberg correction is applied separately within each group comparison. 4372 - Fold-change is stabilized using a small, data-derived ``low_factor``. 4373 - Uses ``Mann–Whitney U`` test with ``alternative='two-sided'``. 4374 4375 """ 4376 4377 tmp_data = self.tmp_data.copy() 4378 4379 tmp_data = tmp_data.select_dtypes(include="number") 4380 4381 tmp_metadata = self.tmp_metadata.copy() 4382 4383 if len(sets.keys()) >= 2: 4384 print("\nAnalysis strated on provided sets dictionary and meta_group_by...") 4385 tmp_data.index = list(tmp_metadata[meta_group_by]) 4386 tmp_metadata["sets"] = tmp_metadata[meta_group_by] 4387 results = statistic( 4388 tmp_data.transpose(), sets=sets, metadata=tmp_metadata, n_proc=n_proc 4389 ) 4390 4391 else: 4392 print( 4393 "\nAnalysis strated on for all groups to each other in meta_group_by..." 4394 ) 4395 tmp_data.index = list(tmp_metadata[meta_group_by]) 4396 tmp_metadata["sets"] = tmp_metadata[meta_group_by] 4397 results = statistic( 4398 tmp_data.transpose(), sets=None, metadata=tmp_metadata, n_proc=n_proc 4399 ) 4400 4401 self.DFA_results = results 4402 4403 def heatmap_DFA( 4404 self, 4405 p_value: float | int = 0.05, 4406 top_n: int = 5, 4407 scale: bool = False, 4408 clustering: str | None = "ward", 4409 figsize=(10, 5), 4410 ): 4411 """ 4412 Generate a heatmap of the top DFA features filtered by p-value and log fold change. 4413 4414 Parameters 4415 ---------- 4416 p_value : float or int, optional 4417 Significance threshold used to filter features by their p-value. 4418 Only features with p_val < p_value are included. Default is 0.05. 4419 4420 top_n : int, optional 4421 Number of top features selected per group based on the 'esm' score. 4422 Default is 5. 4423 4424 scale : bool, optional 4425 Whether to apply Min–Max scaling to heatmap values across features. 4426 Default is False. 4427 4428 clustering : str or None, optional 4429 Hierarchical clustering method applied to rows/columns of the heatmap. 4430 If None, clustering is disabled. Default is 'ward'. 4431 4432 figsize : tuple, optional 4433 Size of the resulting matplotlib figure. Default is (10, 5). 4434 4435 Notes 4436 ----- 4437 - Only features with a positive log fold change ('log(FC)' > 0) are considered. 4438 - Heatmap values represent -log10(p_value) for visualization. 4439 - If `scale=True`, values are normalized using Min–Max scaling. 4440 - The generated figure is displayed and stored in `self.DFA_plot`. 4441 """ 4442 4443 df_reduced = self.DFA_results.copy() 4444 4445 df_reduced = df_reduced[df_reduced["log(FC)"] > 0] 4446 4447 df_reduced = df_reduced[df_reduced["p_val"] < p_value] 4448 4449 df_reduced = ( 4450 df_reduced.sort_values(["valid_group", "esm"], ascending=[True, False]) 4451 .groupby("valid_group", as_index=False) 4452 .head(top_n) 4453 ) 4454 4455 heatmap_data = df_reduced.pivot( 4456 index="feature", columns="valid_group", values="-log(p_val)" 4457 ).fillna(0) 4458 4459 label = "-log10(p_value)" 4460 4461 if scale: 4462 label = f"scaled({label})" 4463 scaler = MinMaxScaler() 4464 heatmap_data = pd.DataFrame( 4465 scaler.fit_transform(heatmap_data), 4466 index=heatmap_data.index, 4467 columns=heatmap_data.columns, 4468 ) 4469 4470 if clustering is not None: 4471 Z_rows = linkage(heatmap_data.values, method=clustering) 4472 row_order = leaves_list(Z_rows) 4473 4474 Z_cols = linkage(heatmap_data.values.T, method=clustering) 4475 col_order = leaves_list(Z_cols) 4476 4477 heatmap_data = heatmap_data.iloc[row_order, col_order] 4478 4479 figure = plt.figure(figsize=figsize) 4480 sns.heatmap( 4481 heatmap_data, 4482 cmap="viridis", 4483 linewidths=0.5, 4484 linecolor="gray", 4485 cbar_kws={"label": label}, 4486 fmt=".2f", 4487 ) 4488 plt.ylabel("Feature") 4489 plt.xlabel("Cluster") 4490 plt.xticks(rotation=30, ha="right") 4491 4492 plt.tight_layout() 4493 4494 if cfg._DISPLAY_MODE: 4495 plt.show() 4496 4497 self.DFA_plot = figure 4498 4499 def get_DFA_plot(self, show: bool = True): 4500 """ 4501 Retrieve the heatmap figure generated by `heatmap_DFA()`. 4502 4503 Parameters 4504 ---------- 4505 show : bool, optional 4506 Whether to display the stored heatmap figure. Default is True. 4507 4508 Returns 4509 ------- 4510 matplotlib.figure.Figure 4511 The figure object containing the DFA heatmap. 4512 """ 4513 4514 if self.DFA_plot is None: 4515 print("\nNo results to return! Please run the heatmap_DFA() method first.") 4516 else: 4517 if cfg._DISPLAY_MODE: 4518 if show is True: 4519 self.DFA_plot 4520 try: 4521 display(self.DFA_plot) 4522 except: 4523 None 4524 4525 return self.DFA_plot 4526 4527 def print_avaiable_features(self): 4528 """ 4529 Print the available features (columns) in the current dataset. 4530 4531 This method lists all column names in `self.tmp_data` to help identify which features are available for analysis. 4532 4533 Example 4534 ------- 4535 >>> group_analysis.print_avaiable_features() 4536 """ 4537 4538 print("Avaiable features:") 4539 for cl in self.tmp_data.columns: 4540 print(cl) 4541 4542 def proportion_analysis( 4543 self, 4544 grouping_col: str = "sets", 4545 val_col: str = "nuclei_per_img", 4546 grouping_dict=None, 4547 omit=None, 4548 ): 4549 """ 4550 Perform proportion analysis by comparing the distribution of values across groups. 4551 4552 This method analyzes the distribution of values (e.g., nuclei counts) across different groups defined in the dataset. It can optionally group categories, omit specific values, and produces both statistical results and a visualization. 4553 4554 Parameters 4555 ---------- 4556 grouping_col : str, optional 4557 Column to group by. Default is 'sets'. 4558 4559 val_col : str, optional 4560 Column containing the values to analyze. Default is 'nuclei_per_img'. 4561 4562 grouping_dict : dict or None, optional 4563 Dictionary mapping new group names to categories in `grouping_col`. If None, analysis is based on the original groups. 4564 4565 omit : str, list, or None, optional 4566 Values to exclude from the analysis. Default is None. 4567 4568 Attributes 4569 ---------- 4570 proportion_stats : pd.DataFrame 4571 DataFrame containing chi-square test results for pairwise group comparisons. 4572 4573 proportion_plot : matplotlib.figure.Figure 4574 Plot visualizing the proportions across groups. 4575 4576 Example 4577 ------- 4578 >>> group_analysis.proportion_analysis( 4579 ... grouping_col='sets', 4580 ... val_col='nuclei_per_img', 4581 ... grouping_dict={'Group A': [1, 2], 'Group B': [3, 4]}, 4582 ... omit=5 4583 ... ) 4584 """ 4585 4586 andata = self.tmp_data.copy() 4587 4588 andata[grouping_col] = list(self.tmp_metadata[grouping_col]) 4589 4590 andata = andata[[grouping_col, val_col]] 4591 4592 if omit is not None: 4593 if isinstance(omit, list): 4594 andata = andata[~andata[val_col].isin(omit)] 4595 else: 4596 andata = andata[andata[val_col] != omit] 4597 4598 andata = andata.reset_index(drop=True) 4599 andata["index_col"] = andata.index 4600 4601 if isinstance(grouping_dict, dict): 4602 for k in grouping_dict.keys(): 4603 andata.loc[ 4604 andata[grouping_col].isin(grouping_dict[k]), grouping_col 4605 ] = k 4606 4607 df_pivot = andata.pivot_table( 4608 index=val_col, 4609 columns=grouping_col, 4610 values="index_col", 4611 aggfunc="count", 4612 fill_value=0, 4613 ) 4614 4615 Z_rows = linkage(df_pivot.values, method="ward") 4616 row_order = leaves_list(Z_rows) 4617 4618 Z_cols = linkage(df_pivot.values.T, method="ward") 4619 col_order = leaves_list(Z_cols) 4620 4621 df_pivot = df_pivot.iloc[row_order, col_order] 4622 4623 chi_df = chi_pairs(df_pivot) 4624 4625 self.proportion_stats = chi_pairs(df_pivot) 4626 4627 chi_df["Significance_Label"] = chi_df["p-value"].apply(get_significance_label) 4628 4629 self.proportion_plot = prop_plot(df_pivot, chi_df) 4630 4631 def get_proportion_plot(self, show: bool = True): 4632 """ 4633 Retrieve the proportion bar plot generated by the `proportion_analysis()` method. 4634 4635 Parameters 4636 ---------- 4637 show : bool, optional 4638 Whether to display the proportion bar plot. Default is True. 4639 4640 Returns 4641 ------- 4642 matplotlib.figure.Figure 4643 The figure object containing the proportion bar plot. 4644 """ 4645 4646 if self.proportion_plot is None: 4647 print( 4648 "\nNo results to return! Please run the proportion_analysis() method first." 4649 ) 4650 else: 4651 if cfg._DISPLAY_MODE: 4652 if show: 4653 self.proportion_plot 4654 try: 4655 display(self.proportion_plot) 4656 except: 4657 None 4658 4659 return self.proportion_plot 4660 4661 def get_proportion_stats(self): 4662 """ 4663 Retrieve the proportion statistics computed by the `proportion_analysis()` method. 4664 4665 Returns 4666 ------- 4667 pd.DataFrame 4668 The proportion statistics stored in `self.proportion_stats`. 4669 """ 4670 4671 if None in self.proportion_stats: 4672 print( 4673 "\nNo results to return! Please run the proportion_analysis() method first." 4674 ) 4675 else: 4676 return self.proportion_stats
46class RepTools: 47 """ 48 A utility class for processing and repairing nuclei data. 49 Provides methods for extracting subsets, removing outliers, computing geometrical features, 50 and merging/splitting nuclei based on spatial and intensity criteria. 51 """ 52 53 def extract_dict_by_indices(self, d, indices): 54 """ 55 Extracts elements from all dictionary lists using provided indices. 56 57 Parameters 58 ---------- 59 d : dict 60 Dictionary with list values. 61 62 indices : list 63 List of indices to extract from each dictionary entry. 64 65 Returns 66 ------- 67 dict 68 Dictionary containing only the selected elements. 69 """ 70 71 return { 72 key: [values[i] for i in indices if i < len(values)] 73 for key, values in d.items() 74 } 75 76 def drop_outlires(self, row, diff_FC_intensity=0.95, circ=0.6): 77 """ 78 Identify indices of nuclei that are considered outliers based on circularity and intensity. 79 80 Parameters 81 ---------- 82 row : dict 83 Dictionary containing nuclei properties, including 'circularity' and 'intensity_mean'. 84 85 diff_FC_intensity : float 86 Fraction of mean intensity below which a nucleus is considered an outlier. 87 88 circ : float 89 Minimum circularity threshold for nuclei to be considered. 90 91 Returns 92 ------- 93 list 94 List of indices to drop as outliers. 95 """ 96 97 cd = [] 98 for n, _ in enumerate(row["circularity"]): 99 if row["circularity"][n] > circ: 100 cd.append(n) 101 102 row = self.extract_dict_by_indices(row, cd) 103 104 drop = [] 105 is_mean = np.mean(row["intensity_mean"]) 106 107 for n, _ in enumerate(row["intensity_mean"]): 108 FC_mean = row["intensity_mean"][n] / is_mean 109 if FC_mean < diff_FC_intensity: 110 drop.append(n) 111 112 return drop 113 114 def nn(self, coords): 115 """ 116 Compute close neighbors between nuclei coordinates using a threshold distance. 117 118 Parameters 119 ---------- 120 coords : list 121 List of numpy arrays, each containing coordinates for a nucleus. 122 123 Returns 124 ------- 125 dict 126 Dictionary mapping pairs of nuclei indices to the number of close neighbors. 127 """ 128 129 full_list = {} 130 for i in range(len(coords)): 131 for j in range(len(coords)): 132 if i != j: 133 134 tree1 = cKDTree(coords[i]) 135 136 distances, indices = tree1.query(coords[j]) 137 138 threshold = 2 139 close_neighbors = np.sum(distances < threshold) 140 141 full_list[f"{i} --> {j}"] = close_neighbors 142 143 return full_list 144 145 def compute_axes_length(self, contour): 146 """ 147 Compute major and minor axis lengths of a nucleus from its contour. 148 149 Parameters 150 ---------- 151 contour : np.ndarray 152 Coordinates of nucleus contour points. 153 154 Returns 155 ------- 156 tuple 157 Major and minor axis lengths. 158 """ 159 160 cov = np.cov(contour.T) 161 162 eigvals, _ = np.linalg.eigh(cov) 163 164 axis_major_length = 2 * np.sqrt(eigvals.max()) 165 axis_minor_length = 2 * np.sqrt(eigvals.min()) 166 167 return axis_major_length, axis_minor_length 168 169 def compute_eccentricity(self, contour): 170 """ 171 Compute eccentricity of a nucleus from its contour. 172 173 Parameters 174 ---------- 175 contour : np.ndarray 176 Coordinates of nucleus contour points. 177 178 Returns 179 ------- 180 float 181 Eccentricity of the nucleus. 182 """ 183 184 cov = np.cov(contour.T) 185 eigvals, _ = np.linalg.eigh(cov) 186 187 eccentricity = np.sqrt(1 - (eigvals.min() / eigvals.max())) 188 return eccentricity 189 190 def compute_feret_diameter(self, contour): 191 """ 192 Compute the Feret diameter of a given contour. 193 194 The Feret diameter is defined as the maximum pairwise Euclidean distance between points in the contour. 195 196 Parameters 197 ---------- 198 contour : np.ndarray 199 Array of shape (N, 2) representing the contour coordinates. 200 201 Returns 202 ------- 203 float 204 The maximum distance between any two points in the contour. 205 """ 206 207 rect = cv2.minAreaRect(contour) 208 (w, h) = rect[1] 209 return max(w, h) 210 211 def compute_perimeter(self, contour): 212 """ 213 Compute the perimeter of a contour. 214 215 The perimeter is calculated as the sum of Euclidean distances between consecutive points in the contour. 216 217 Parameters 218 ---------- 219 contour : np.ndarray 220 Array of shape (N, 2) representing the contour coordinates. 221 222 Returns 223 ------- 224 float 225 Perimeter length of the contour. 226 """ 227 228 return np.sum(np.linalg.norm(np.diff(contour, axis=0), axis=1)) 229 230 def compute_circularity(self, contour): 231 """ 232 Compute the circularity of a contour. 233 234 Circularity is a measure of how close the shape is to a perfect circle. 235 It is calculated as 4 * pi * (area / perimeter^2). 236 237 Parameters 238 ---------- 239 contour : np.ndarray 240 Array of shape (N, 2) representing the contour coordinates. 241 242 Returns 243 ------- 244 float 245 Circularity of the contour. Value ranges from 0 to 1, where 1 indicates a perfect circle. 246 """ 247 perimeter = self.compute_perimeter(contour) 248 hull = ConvexHull(contour) 249 area = hull.volume 250 251 return (4 * np.pi * area) / (perimeter**2) 252 253 def repairing_nuclei(self, results): 254 """ 255 Repair nuclei segmentation results by merging or removing outlier nuclei. 256 257 This method adjusts nuclei detection results based on global and local thresholds, circularity, nearest neighbor relationships, 258 and merges small or fragmented nuclei when appropriate. It also recalculates key morphological properties for merged nuclei. 259 260 Parameters 261 ---------- 262 results : dict 263 Dictionary where keys are image identifiers and values are dictionaries containing detected nuclei properties 264 (e.g., 'area', 'coords', 'label', 'circularity', etc.). 265 266 Returns 267 ------- 268 dict 269 A dictionary in the same structure as `results`, but with repaired nuclei information after merging or removing outliers. 270 """ 271 272 # repairing nuclei 273 mean_sum_area = [] 274 im = [] 275 n = [] 276 for r in tqdm(results.keys()): 277 mean_sum_area.append(np.sum(results[r]["area"])) 278 n.append(len(results[r]["area"])) 279 im.append(r) 280 281 mean_sum_area_sum = np.mean(mean_sum_area) 282 283 results_dict = {} 284 285 print("\nImage repairing:\n\n") 286 287 for i, m in tqdm(zip(im, n), total=len(im)): 288 289 if ( 290 m > 1 291 and np.sum(results[i]["area"]) / mean_sum_area_sum 292 < self.hyperparameter_nuclei["FC_diff_global"] 293 ): 294 # adjustment to global changes 295 296 temporary_dict = results[i] 297 298 check_drop = self.drop_outlires( 299 temporary_dict, 300 diff_FC_intensity=self.hyperparameter_nuclei[ 301 "FC_diff_local_intensity" 302 ], 303 circ=self.hyperparameter_nuclei["circularity"], 304 ) 305 306 to_final = [ 307 x 308 for x in list(range(len(temporary_dict["area"]))) 309 if int(x) not in check_drop 310 ] 311 312 tmp = self.extract_dict_by_indices(temporary_dict, to_final) 313 314 to_concat = [] 315 316 if len(tmp["coords"]) > 1: 317 318 results_nn = self.nn(tmp["coords"]) 319 320 for kn in results_nn.keys(): 321 if results_nn[kn] > self.hyperparameter_nuclei["nn_min"]: 322 to_concat.append(int(re.sub(" --> .*", "", kn))) 323 to_concat.append(int(re.sub(".* --> ", "", kn))) 324 325 to_concat = list(set(to_concat)) 326 327 to_rest = [ 328 x for x in list(range(len(tmp["area"]))) if x not in to_concat 329 ] 330 331 # 332 if len(to_concat) > 1: 333 to_concat_dict = self.extract_dict_by_indices(tmp, to_concat) 334 to_concat_dict["coords"] = [np.vstack(to_concat_dict["coords"])] 335 to_concat_dict["label"] = [min(to_concat_dict["label"])] 336 to_concat_dict["area"] = [np.sum(to_concat_dict["area"])] 337 to_concat_dict["area_bbox"] = [np.sum(to_concat_dict["area_bbox"])] 338 to_concat_dict["area_convex"] = [ 339 np.sum(to_concat_dict["area_convex"]) 340 ] 341 to_concat_dict["area_filled"] = [ 342 np.sum(to_concat_dict["area_filled"]) 343 ] 344 to_concat_dict["intensity_max"] = [ 345 np.max(to_concat_dict["intensity_max"]) 346 ] 347 to_concat_dict["intensity_mean"] = [ 348 np.mean(to_concat_dict["intensity_mean"]) 349 ] 350 to_concat_dict["intensity_min"] = [ 351 np.min(to_concat_dict["intensity_min"]) 352 ] 353 major, minor = self.compute_axes_length(to_concat_dict["coords"][0]) 354 to_concat_dict["axis_major_length"] = [major] 355 to_concat_dict["axis_minor_length"] = [minor] 356 to_concat_dict["ratio"] = [minor / major] 357 ecc = self.compute_eccentricity(to_concat_dict["coords"][0]) 358 to_concat_dict["eccentricity"] = [ecc] 359 to_concat_dict["equivalent_diameter_area"] = [ 360 np.sum(to_concat_dict["equivalent_diameter_area"]) 361 ] 362 feret_diameter = self.compute_feret_diameter( 363 to_concat_dict["coords"][0] 364 ) 365 to_concat_dict["feret_diameter_max"] = [feret_diameter] 366 to_concat_dict["solidity"] = [np.mean(to_concat_dict["solidity"])] 367 to_concat_dict["perimeter"] = [np.sum(to_concat_dict["perimeter"])] 368 to_concat_dict["perimeter_crofton"] = [ 369 np.sum(to_concat_dict["perimeter_crofton"]) 370 ] 371 to_concat_dict["circularity"] = [ 372 np.mean(to_concat_dict["circularity"]) 373 ] 374 375 to_rest_dict = self.extract_dict_by_indices(tmp, to_rest) 376 377 for ik in to_rest_dict.keys(): 378 to_rest_dict[ik] = to_rest_dict[ik] + to_concat_dict[ik] 379 380 results_dict[i] = to_rest_dict 381 382 else: 383 results_dict[i] = tmp 384 385 elif ( 386 m == 1 387 and results[i]["circularity"][0] 388 > self.hyperparameter_nuclei["circularity"] 389 ): 390 391 results_dict[i] = results[i] 392 393 return results_dict
A utility class for processing and repairing nuclei data. Provides methods for extracting subsets, removing outliers, computing geometrical features, and merging/splitting nuclei based on spatial and intensity criteria.
53 def extract_dict_by_indices(self, d, indices): 54 """ 55 Extracts elements from all dictionary lists using provided indices. 56 57 Parameters 58 ---------- 59 d : dict 60 Dictionary with list values. 61 62 indices : list 63 List of indices to extract from each dictionary entry. 64 65 Returns 66 ------- 67 dict 68 Dictionary containing only the selected elements. 69 """ 70 71 return { 72 key: [values[i] for i in indices if i < len(values)] 73 for key, values in d.items() 74 }
Extracts elements from all dictionary lists using provided indices.
Parameters
d : dict Dictionary with list values.
indices : list List of indices to extract from each dictionary entry.
Returns
dict Dictionary containing only the selected elements.
76 def drop_outlires(self, row, diff_FC_intensity=0.95, circ=0.6): 77 """ 78 Identify indices of nuclei that are considered outliers based on circularity and intensity. 79 80 Parameters 81 ---------- 82 row : dict 83 Dictionary containing nuclei properties, including 'circularity' and 'intensity_mean'. 84 85 diff_FC_intensity : float 86 Fraction of mean intensity below which a nucleus is considered an outlier. 87 88 circ : float 89 Minimum circularity threshold for nuclei to be considered. 90 91 Returns 92 ------- 93 list 94 List of indices to drop as outliers. 95 """ 96 97 cd = [] 98 for n, _ in enumerate(row["circularity"]): 99 if row["circularity"][n] > circ: 100 cd.append(n) 101 102 row = self.extract_dict_by_indices(row, cd) 103 104 drop = [] 105 is_mean = np.mean(row["intensity_mean"]) 106 107 for n, _ in enumerate(row["intensity_mean"]): 108 FC_mean = row["intensity_mean"][n] / is_mean 109 if FC_mean < diff_FC_intensity: 110 drop.append(n) 111 112 return drop
Identify indices of nuclei that are considered outliers based on circularity and intensity.
Parameters
row : dict Dictionary containing nuclei properties, including 'circularity' and 'intensity_mean'.
diff_FC_intensity : float Fraction of mean intensity below which a nucleus is considered an outlier.
circ : float Minimum circularity threshold for nuclei to be considered.
Returns
list List of indices to drop as outliers.
114 def nn(self, coords): 115 """ 116 Compute close neighbors between nuclei coordinates using a threshold distance. 117 118 Parameters 119 ---------- 120 coords : list 121 List of numpy arrays, each containing coordinates for a nucleus. 122 123 Returns 124 ------- 125 dict 126 Dictionary mapping pairs of nuclei indices to the number of close neighbors. 127 """ 128 129 full_list = {} 130 for i in range(len(coords)): 131 for j in range(len(coords)): 132 if i != j: 133 134 tree1 = cKDTree(coords[i]) 135 136 distances, indices = tree1.query(coords[j]) 137 138 threshold = 2 139 close_neighbors = np.sum(distances < threshold) 140 141 full_list[f"{i} --> {j}"] = close_neighbors 142 143 return full_list
Compute close neighbors between nuclei coordinates using a threshold distance.
Parameters
coords : list List of numpy arrays, each containing coordinates for a nucleus.
Returns
dict Dictionary mapping pairs of nuclei indices to the number of close neighbors.
145 def compute_axes_length(self, contour): 146 """ 147 Compute major and minor axis lengths of a nucleus from its contour. 148 149 Parameters 150 ---------- 151 contour : np.ndarray 152 Coordinates of nucleus contour points. 153 154 Returns 155 ------- 156 tuple 157 Major and minor axis lengths. 158 """ 159 160 cov = np.cov(contour.T) 161 162 eigvals, _ = np.linalg.eigh(cov) 163 164 axis_major_length = 2 * np.sqrt(eigvals.max()) 165 axis_minor_length = 2 * np.sqrt(eigvals.min()) 166 167 return axis_major_length, axis_minor_length
Compute major and minor axis lengths of a nucleus from its contour.
Parameters
contour : np.ndarray Coordinates of nucleus contour points.
Returns
tuple Major and minor axis lengths.
169 def compute_eccentricity(self, contour): 170 """ 171 Compute eccentricity of a nucleus from its contour. 172 173 Parameters 174 ---------- 175 contour : np.ndarray 176 Coordinates of nucleus contour points. 177 178 Returns 179 ------- 180 float 181 Eccentricity of the nucleus. 182 """ 183 184 cov = np.cov(contour.T) 185 eigvals, _ = np.linalg.eigh(cov) 186 187 eccentricity = np.sqrt(1 - (eigvals.min() / eigvals.max())) 188 return eccentricity
Compute eccentricity of a nucleus from its contour.
Parameters
contour : np.ndarray Coordinates of nucleus contour points.
Returns
float Eccentricity of the nucleus.
190 def compute_feret_diameter(self, contour): 191 """ 192 Compute the Feret diameter of a given contour. 193 194 The Feret diameter is defined as the maximum pairwise Euclidean distance between points in the contour. 195 196 Parameters 197 ---------- 198 contour : np.ndarray 199 Array of shape (N, 2) representing the contour coordinates. 200 201 Returns 202 ------- 203 float 204 The maximum distance between any two points in the contour. 205 """ 206 207 rect = cv2.minAreaRect(contour) 208 (w, h) = rect[1] 209 return max(w, h)
Compute the Feret diameter of a given contour.
The Feret diameter is defined as the maximum pairwise Euclidean distance between points in the contour.
Parameters
contour : np.ndarray Array of shape (N, 2) representing the contour coordinates.
Returns
float The maximum distance between any two points in the contour.
211 def compute_perimeter(self, contour): 212 """ 213 Compute the perimeter of a contour. 214 215 The perimeter is calculated as the sum of Euclidean distances between consecutive points in the contour. 216 217 Parameters 218 ---------- 219 contour : np.ndarray 220 Array of shape (N, 2) representing the contour coordinates. 221 222 Returns 223 ------- 224 float 225 Perimeter length of the contour. 226 """ 227 228 return np.sum(np.linalg.norm(np.diff(contour, axis=0), axis=1))
Compute the perimeter of a contour.
The perimeter is calculated as the sum of Euclidean distances between consecutive points in the contour.
Parameters
contour : np.ndarray Array of shape (N, 2) representing the contour coordinates.
Returns
float Perimeter length of the contour.
230 def compute_circularity(self, contour): 231 """ 232 Compute the circularity of a contour. 233 234 Circularity is a measure of how close the shape is to a perfect circle. 235 It is calculated as 4 * pi * (area / perimeter^2). 236 237 Parameters 238 ---------- 239 contour : np.ndarray 240 Array of shape (N, 2) representing the contour coordinates. 241 242 Returns 243 ------- 244 float 245 Circularity of the contour. Value ranges from 0 to 1, where 1 indicates a perfect circle. 246 """ 247 perimeter = self.compute_perimeter(contour) 248 hull = ConvexHull(contour) 249 area = hull.volume 250 251 return (4 * np.pi * area) / (perimeter**2)
Compute the circularity of a contour.
Circularity is a measure of how close the shape is to a perfect circle. It is calculated as 4 * pi * (area / perimeter^2).
Parameters
contour : np.ndarray Array of shape (N, 2) representing the contour coordinates.
Returns
float Circularity of the contour. Value ranges from 0 to 1, where 1 indicates a perfect circle.
253 def repairing_nuclei(self, results): 254 """ 255 Repair nuclei segmentation results by merging or removing outlier nuclei. 256 257 This method adjusts nuclei detection results based on global and local thresholds, circularity, nearest neighbor relationships, 258 and merges small or fragmented nuclei when appropriate. It also recalculates key morphological properties for merged nuclei. 259 260 Parameters 261 ---------- 262 results : dict 263 Dictionary where keys are image identifiers and values are dictionaries containing detected nuclei properties 264 (e.g., 'area', 'coords', 'label', 'circularity', etc.). 265 266 Returns 267 ------- 268 dict 269 A dictionary in the same structure as `results`, but with repaired nuclei information after merging or removing outliers. 270 """ 271 272 # repairing nuclei 273 mean_sum_area = [] 274 im = [] 275 n = [] 276 for r in tqdm(results.keys()): 277 mean_sum_area.append(np.sum(results[r]["area"])) 278 n.append(len(results[r]["area"])) 279 im.append(r) 280 281 mean_sum_area_sum = np.mean(mean_sum_area) 282 283 results_dict = {} 284 285 print("\nImage repairing:\n\n") 286 287 for i, m in tqdm(zip(im, n), total=len(im)): 288 289 if ( 290 m > 1 291 and np.sum(results[i]["area"]) / mean_sum_area_sum 292 < self.hyperparameter_nuclei["FC_diff_global"] 293 ): 294 # adjustment to global changes 295 296 temporary_dict = results[i] 297 298 check_drop = self.drop_outlires( 299 temporary_dict, 300 diff_FC_intensity=self.hyperparameter_nuclei[ 301 "FC_diff_local_intensity" 302 ], 303 circ=self.hyperparameter_nuclei["circularity"], 304 ) 305 306 to_final = [ 307 x 308 for x in list(range(len(temporary_dict["area"]))) 309 if int(x) not in check_drop 310 ] 311 312 tmp = self.extract_dict_by_indices(temporary_dict, to_final) 313 314 to_concat = [] 315 316 if len(tmp["coords"]) > 1: 317 318 results_nn = self.nn(tmp["coords"]) 319 320 for kn in results_nn.keys(): 321 if results_nn[kn] > self.hyperparameter_nuclei["nn_min"]: 322 to_concat.append(int(re.sub(" --> .*", "", kn))) 323 to_concat.append(int(re.sub(".* --> ", "", kn))) 324 325 to_concat = list(set(to_concat)) 326 327 to_rest = [ 328 x for x in list(range(len(tmp["area"]))) if x not in to_concat 329 ] 330 331 # 332 if len(to_concat) > 1: 333 to_concat_dict = self.extract_dict_by_indices(tmp, to_concat) 334 to_concat_dict["coords"] = [np.vstack(to_concat_dict["coords"])] 335 to_concat_dict["label"] = [min(to_concat_dict["label"])] 336 to_concat_dict["area"] = [np.sum(to_concat_dict["area"])] 337 to_concat_dict["area_bbox"] = [np.sum(to_concat_dict["area_bbox"])] 338 to_concat_dict["area_convex"] = [ 339 np.sum(to_concat_dict["area_convex"]) 340 ] 341 to_concat_dict["area_filled"] = [ 342 np.sum(to_concat_dict["area_filled"]) 343 ] 344 to_concat_dict["intensity_max"] = [ 345 np.max(to_concat_dict["intensity_max"]) 346 ] 347 to_concat_dict["intensity_mean"] = [ 348 np.mean(to_concat_dict["intensity_mean"]) 349 ] 350 to_concat_dict["intensity_min"] = [ 351 np.min(to_concat_dict["intensity_min"]) 352 ] 353 major, minor = self.compute_axes_length(to_concat_dict["coords"][0]) 354 to_concat_dict["axis_major_length"] = [major] 355 to_concat_dict["axis_minor_length"] = [minor] 356 to_concat_dict["ratio"] = [minor / major] 357 ecc = self.compute_eccentricity(to_concat_dict["coords"][0]) 358 to_concat_dict["eccentricity"] = [ecc] 359 to_concat_dict["equivalent_diameter_area"] = [ 360 np.sum(to_concat_dict["equivalent_diameter_area"]) 361 ] 362 feret_diameter = self.compute_feret_diameter( 363 to_concat_dict["coords"][0] 364 ) 365 to_concat_dict["feret_diameter_max"] = [feret_diameter] 366 to_concat_dict["solidity"] = [np.mean(to_concat_dict["solidity"])] 367 to_concat_dict["perimeter"] = [np.sum(to_concat_dict["perimeter"])] 368 to_concat_dict["perimeter_crofton"] = [ 369 np.sum(to_concat_dict["perimeter_crofton"]) 370 ] 371 to_concat_dict["circularity"] = [ 372 np.mean(to_concat_dict["circularity"]) 373 ] 374 375 to_rest_dict = self.extract_dict_by_indices(tmp, to_rest) 376 377 for ik in to_rest_dict.keys(): 378 to_rest_dict[ik] = to_rest_dict[ik] + to_concat_dict[ik] 379 380 results_dict[i] = to_rest_dict 381 382 else: 383 results_dict[i] = tmp 384 385 elif ( 386 m == 1 387 and results[i]["circularity"][0] 388 > self.hyperparameter_nuclei["circularity"] 389 ): 390 391 results_dict[i] = results[i] 392 393 return results_dict
Repair nuclei segmentation results by merging or removing outlier nuclei.
This method adjusts nuclei detection results based on global and local thresholds, circularity, nearest neighbor relationships, and merges small or fragmented nuclei when appropriate. It also recalculates key morphological properties for merged nuclei.
Parameters
results : dict Dictionary where keys are image identifiers and values are dictionaries containing detected nuclei properties (e.g., 'area', 'coords', 'label', 'circularity', etc.).
Returns
dict
A dictionary in the same structure as results, but with repaired nuclei information after merging or removing outliers.
396class ImagesManagement: 397 """ 398 A class for managing, preprocessing, merging, stitching, saving, and loading 399 microscopy or flow cytometry images used in NucleiFinder-based analyses. 400 401 This class provides a unified interface for: 402 403 - loading image data, 404 - selecting images by IDs, 405 - preprocessing images (equalization, CLAHE, gamma/contrast/brightness adjustment), 406 - merging images with user-defined intensity ratios, 407 - stitching images horizontally, 408 - retrieving and saving processed image sets. 409 410 The class stores original or loaded data in the ``results_images`` attribute, 411 and all processed images in ``prepared_images`` under user-defined acronyms. 412 These acronyms allow flexible retrieval with ``get_prepared_images()`` 413 and exporting via ``save_prepared_images()``. 414 415 Parameters 416 ---------- 417 images_ids : list[int] 418 List of selected image identifiers. 419 420 result_dict : dict or None 421 Dictionary containing raw or preprocessed images. 422 If ``None``, images may later be loaded or processed from file. 423 424 experiment_name : str 425 Name of the experiment. Used for saving and structuring output. 426 427 Attributes 428 ---------- 429 images_ids : list[int] 430 IDs of images managed by the class. 431 432 results_images : dict or None 433 Dictionary containing raw or analysis-derived images. 434 435 experiment_name : str 436 Name of the experiment. Used in saved filenames. 437 438 prepared_images : dict 439 Container for processed/adjusted/merged/stitched images, 440 indexed by user-defined acronyms. 441 442 Notes 443 ----- 444 Processed images are stored only in memory until saved explicitly with 445 ``save_prepared_images()``. 446 447 Raw images loaded from NucleiFinder analyses can be saved for later reuse 448 in a serialized `.inuc` format using ``save_raw()``. 449 450 Examples 451 -------- 452 Load image results from an analysis: 453 454 >>> manager = ImagesManagement.load_experimental_images(results, "experiment1") 455 456 Adjust selected images: 457 458 >>> manager.adjust_images( 459 ... acronyme="adj", 460 ... path_to_images="path/to/imgs", 461 ... eq=True, 462 ... clahe=True 463 ... ) 464 465 Merge multiple prepared sets: 466 467 >>> manager.image_merging(["adj", "other"], ratio_list=[0.7, 0.3]) 468 469 Retrieve processed images: 470 471 >>> imgs = manager.get_prepared_images("adj") 472 473 Save stitched images to disk: 474 475 >>> manager.save_prepared_images("stitched_adj_other", "./output/") 476 """ 477 478 def __init__(self, images_ids, result_dict, experiment_name): 479 """ 480 Initialize the ImagesManagement object. 481 482 Parameters 483 ---------- 484 images_ids : list[int] 485 List of image identifiers. 486 487 result_dict : dict or None 488 Dictionary containing processed images. 489 490 experiment_name : str 491 Name of the experiment. 492 """ 493 494 self.images_ids = images_ids 495 """Stores the list of image IDs managed by this instance.""" 496 self.results_images = result_dict 497 """Stores dictionary containing processed images.""" 498 self.experiment_name = experiment_name 499 """Stores the experiment name for file naming and organizational purposes.""" 500 self.prepared_images = {} 501 """Dictionary for storing processed images (adjusted, merged, stitched), 502 indexed by user-defined acronyms for flexible retrieval.""" 503 504 @classmethod 505 def load_from_dict(cls, path: str, experiment_name: str): 506 """ 507 Load an ImagesManagement instance from a `.inuc` serialized dictionary. 508 509 Parameters 510 ---------- 511 path : str 512 Path to the `.inuc` file exported with `save_raw()`. 513 514 experiment_name : str 515 Name of the experiment. 516 517 Returns 518 ------- 519 ImagesManagement 520 A reconstructed ImagesManagement object. 521 """ 522 523 if ".inuc" in path: 524 525 if os.path.exists(path): 526 527 loaded_data = np.load(path) 528 data_dict = {key: loaded_data[key] for key in loaded_data} 529 530 id_list = [] 531 532 for k in data_dict.keys(): 533 id_list.append(re.sub("_.*", "", k)) 534 535 return cls(id_list, data_dict, experiment_name) 536 537 else: 538 raise ValueError("\nInvalid path!") 539 540 else: 541 raise ValueError( 542 "\nInvalid dictionary to load. It must contain a .inuc extension!" 543 ) 544 545 @classmethod 546 def load_experimental_images(cls, results_dict: dict, experiment_name: str): 547 """ 548 Load results exported from NucleiFinder series analysis. 549 550 Initialize the object with results from series_analysis_nuclei() 551 or series_analysis_chromatinization() of the NucleiFinder class. 552 553 554 Parameters 555 ---------- 556 results_dict : dict 557 Dictionary returned by `series_analysis_nuclei()` or 558 `series_analysis_chromatinization()`. 559 560 experiment_name : str 561 Name of the experiment. 562 563 Returns 564 ------- 565 ImagesManagement 566 567 """ 568 569 res_dict = {} 570 id_list = [] 571 572 if set(results_dict[list(results_dict.keys())[0]].keys()) != set( 573 ["stats", "img"] 574 ): 575 raise ValueError( 576 "Incorrect data provided. The data must come from series_analysis_nuclei() " 577 "or series_analysis_chromatinization() of the NucleiFinder class." 578 ) 579 580 for k in results_dict.keys(): 581 res_dict[k] = results_dict[k]["img"] 582 id_list.append(re.sub("_.*", "", k)) 583 584 return cls(id_list, res_dict, experiment_name) 585 586 @classmethod 587 def load_images_ids(cls, images_ids: list, experiment_name: str): 588 """ 589 Initialize the object with list of images IDs for porcesing. 590 591 Parameters 592 ---------- 593 images_ids : list[int] 594 List of selected image IDs. 595 596 experiment_name : str 597 Name of the experiment. 598 599 Returns 600 ------- 601 ImagesManagement 602 603 """ 604 605 if len(images_ids) == 0: 606 raise ValueError( 607 "Incorrect data provided. There must be a list of image IDs." 608 ) 609 610 return cls(images_ids, None, experiment_name) 611 612 def get_included_acronyms(self): 613 """ 614 Print the data acronyms for adjusted images, processed using the 615 self.adjust_images(), self.image_merging(), and self.image_stitching() methods. 616 617 Acronym information is essential for retrieving and saving data using 618 the self.get_prepared_images() and self.save_prepared_images() methods. 619 620 Notes 621 ----- 622 This method prints the list of available acronyms but does not return it. 623 624 """ 625 626 if len(self.prepared_images.keys()) > 0: 627 print("\nAvaiable stored images:\n") 628 for kd in self.prepared_images.keys(): 629 print(kd) 630 631 else: 632 print("Nothing to return!") 633 634 def get_prepared_images(self, acronyme=None): 635 """ 636 Retrieves the prepared images (returned from adjust_images()) stored in the object. 637 638 639 Parameters 640 ---------- 641 acronyme : str or None 642 Acronym identifying a processed image set. If None, prints available keys. 643 644 645 Returns 646 ------- 647 dict 648 Dictionary of prepared images. 649 """ 650 651 if acronyme is None: 652 653 self.get_included_acronyms() 654 655 else: 656 657 if acronyme in list(self.prepared_images.keys()): 658 return self.prepared_images[acronyme] 659 660 raise ValueError("Incorrect acronyme!") 661 662 def save_prepared_images(self, acronyme: str, path_to_save: str = ""): 663 """ 664 Saves prepared images (returned from adjust_images() method) to the specified directory. 665 666 Parameters 667 ---------- 668 path_to_save : str 669 Directory path where the images will be saved. Default is the current working directory. 670 671 """ 672 if acronyme is None: 673 674 self.get_included_acronyms() 675 676 else: 677 678 if acronyme in list(self.prepared_images.keys()): 679 680 path_to_save = os.path.join( 681 path_to_save, f"{self.experiment_name}_{acronyme}" 682 ) 683 684 if not os.path.exists(path_to_save): 685 os.makedirs(path_to_save, exist_ok=True) 686 687 for i in tqdm(self.prepared_images[acronyme].keys()): 688 cv2.imwrite( 689 os.path.join(path_to_save, i + ".png"), 690 self.prepared_images[acronyme][i], 691 ) 692 693 else: 694 raise ValueError("Incorrect acronyme!") 695 696 def adjust_images( 697 self, 698 acronyme: str, 699 path_to_images: str, 700 file_extension: str = "tif", 701 eq: bool = True, 702 clahe: bool = True, 703 kernal: tuple = (50, 50), 704 fille_name_part: str = "", 705 color: str = "gray", 706 max_intensity: int = 65535, 707 min_intenisty: int = 0, 708 brightness: int = 1000, 709 contrast: float = 1.0, 710 gamma: float = 1.0, 711 img_n: int = 0, 712 ): 713 """ 714 Prepares selected images for processing, applying histogram equalization and CLAHE, if required. 715 716 Parameters 717 ---------- 718 acronyme : str 719 Name of images being adjusted in this run. 720 721 path_to_images : str 722 Path to the directory containing images. 723 724 file_extension : str 725 Image file extension. Default is 'tiff'. 726 727 eq : bool 728 Whether to apply histogram equalization. Default is True. 729 730 clahe : bool 731 Whether to apply CLAHE. Default is True. 732 733 kernal : tuple 734 Kernel size for CLAHE. Default is (50, 50). 735 736 fille_name_part : str 737 Part of the file name to filter images. Default is an empty string. 738 739 color : str 740 Color space to use. Default is 'gray'. 741 742 max_intensity : int 743 Maximum intensity for image adjustment. Default is 65535. 744 745 min_intenisty : int 746 Minimum intensity for image adjustment. Default is 0. 747 748 brightness : int 749 Brightness adjustment value. Default is 1000. 750 751 contrast : float 752 Contrast adjustment factor. Default is 1.0. 753 754 gamma : float 755 Gamma correction factor. Default is 1.0. 756 757 img_n : int 758 Number of images to process. Default is 0, which means all images. 759 760 761 Returns 762 ------- 763 dict 764 Dictionary containing the processed images. 765 766 Notes 767 ----- 768 To access the processed images, use the ``get_prepared_images()`` method. 769 770 To save the processed images to disk, use the ``save_prepared_images()`` method. 771 """ 772 773 results_dict = {} 774 775 files = glob.glob(os.path.join(path_to_images, "*." + file_extension)) 776 777 if len(fille_name_part) > 0: 778 files = [x for x in files if fille_name_part.lower() in x.lower()] 779 780 selected_id = self.images_ids 781 782 if len(selected_id) > 0: 783 selected_id = [str(x) for x in selected_id] 784 files = [ 785 x 786 for x in files 787 if re.sub("_.*", "", os.path.basename(x)) in selected_id 788 ] 789 790 if img_n > 0: 791 792 files = random.sample(files, img_n) 793 794 for file in tqdm(files): 795 796 image = load_image(file) 797 798 try: 799 image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) 800 except: 801 pass 802 803 if eq is True: 804 image = equalizeHist_16bit(image) 805 806 if clahe is True: 807 image = clahe_16bit(image, kernal=kernal) 808 809 image = adjust_img_16bit( 810 img=image, 811 color=color, 812 max_intensity=max_intensity, 813 min_intenisty=min_intenisty, 814 brightness=brightness, 815 contrast=contrast, 816 gamma=gamma, 817 ) 818 819 results_dict[os.path.basename(file)] = image 820 821 self.prepared_images[acronyme] = results_dict 822 823 def image_merging(self, acronyms: list, ratio_list: list): 824 """ 825 Merge previously prepared images stored in `self.prepared_images`, 826 adjusted based on the image ratios. The used ratios adjust relative image intensity. 827 828 Parameters 829 ---------- 830 acronyme : list 831 List of image names to be merged. 832 833 ratio_list : list[float] 834 List of ratio intensity values (0.0–1.0) for the merged image. 835 The `acronyme` list and `ratio_list` must be of the same length. 836 837 Returns 838 ------- 839 dict 840 Dictionary of processed images. 841 842 Notes 843 ----- 844 To access the processed images, use the ``get_prepared_images()`` method. 845 846 To save the processed images to disk, use the ``save_prepared_images()`` method. 847 """ 848 849 for a in acronyms: 850 if a not in list(self.prepared_images.keys()): 851 raise ValueError(f"Incorrect {a} acronyme!") 852 853 results_img = {} 854 for k in self.images_ids: 855 img_list = [] 856 for a in acronyms: 857 nam = [ 858 x 859 for x in self.prepared_images[a].keys() 860 if str(k) == re.sub("_.*", "", x) 861 ] 862 if len(nam) == 0: 863 print(f"There were not images for {k} ids") 864 break 865 866 img_list.append(self.prepared_images[a][nam[0]]) 867 868 if len(img_list) == len(acronyms): 869 results_img[f'{k}_{"_".join(acronyms)}'] = merge_images( 870 img_list, ratio_list 871 ) 872 873 self.prepared_images[f'merged_{"_".join(acronyms)}'] = results_img 874 875 print(f'Images stored in self.prepared_images["merged_{"_".join(acronyms)}"]') 876 877 def image_stitching(self, acronyms: list, to_results_image: bool = False): 878 """ 879 Stitch (horizontally) previously prepared images stored in `self.prepared_images`. 880 881 Parameters 882 ---------- 883 acronyme : list 884 List of image names to be stitched. 885 886 to_results_image : bool 887 Boolean value indicating whether images obtained from the 888 `series_analysis_nuclei()` or `series_analysis_chromatinization()` 889 methods of the `NucleiFinder` class should be stitched to the right 890 side of the images in the `acronyme` list. 891 892 Returns 893 ------- 894 dict 895 Dictionary of processed images. 896 897 Notes 898 ----- 899 To access the processed images, use the ``get_prepared_images()`` method. 900 901 To save the processed images to disk, use the ``save_prepared_images()`` method. 902 """ 903 904 for a in acronyms: 905 if a not in list(self.prepared_images.keys()): 906 raise ValueError(f"Incorrect {a} acronyme!") 907 908 results_img = {} 909 for k in tqdm(self.images_ids): 910 img_list = [] 911 for a in acronyms: 912 nam = [ 913 x 914 for x in self.prepared_images[a].keys() 915 if str(k) == re.sub("_.*", "", x) 916 ] 917 if len(nam) == 0: 918 print(f"There were not images for {k} ids") 919 break 920 921 img_list.append(self.prepared_images[a][nam[0]]) 922 923 if to_results_image: 924 nam = [ 925 x 926 for x in self.results_images.keys() 927 if str(k) == re.sub("_.*", "", x) 928 ] 929 if len(nam) != 0: 930 img_list.append(self.results_images[nam[0]]) 931 932 if len(img_list) == len(acronyms) + 1: 933 results_img[f'{k}_{"_".join(acronyms)}_res'] = cv2.hconcat(img_list) 934 935 elif to_results_image is not False: 936 if len(img_list) == len(acronyms): 937 results_img[f'{k}_{"_".join(acronyms)}'] = cv2.hconcat(img_list) 938 939 self.prepared_images[f'stitched_{"_".join(acronyms)}'] = results_img 940 941 print(f'Images stored in self.prepared_images["stitched_{"_".join(acronyms)}"]') 942 943 def save_raw(self, path_to_save: str = ""): 944 """ 945 Save `self.results_images` loaded by the `self.load_experimental_images()` method, 946 obtained from the `series_analysis_nuclei()` or `series_analysis_chromatinization()` 947 methods of the `NucleiFinder` class for later usage with cls.load_from_dict() method. 948 The data will be saved with a `.inuc` extension. 949 950 Parameters 951 ---------- 952 path_to_save : str 953 The directory path where the images will be saved. 954 Default is the current working directory. 955 """ 956 957 full_path = os.path.join(path_to_save, f"{self.experiment_name}.inuc") 958 959 np.savez(full_path, **self.results_images)
A class for managing, preprocessing, merging, stitching, saving, and loading microscopy or flow cytometry images used in NucleiFinder-based analyses.
This class provides a unified interface for:
- loading image data,
- selecting images by IDs,
- preprocessing images (equalization, CLAHE, gamma/contrast/brightness adjustment),
- merging images with user-defined intensity ratios,
- stitching images horizontally,
- retrieving and saving processed image sets.
The class stores original or loaded data in the results_images attribute,
and all processed images in prepared_images under user-defined acronyms.
These acronyms allow flexible retrieval with get_prepared_images()
and exporting via save_prepared_images().
Parameters
images_ids : list[int] List of selected image identifiers.
result_dict : dict or None
Dictionary containing raw or preprocessed images.
If None, images may later be loaded or processed from file.
experiment_name : str Name of the experiment. Used for saving and structuring output.
Attributes
images_ids : list[int] IDs of images managed by the class.
results_images : dict or None Dictionary containing raw or analysis-derived images.
experiment_name : str Name of the experiment. Used in saved filenames.
prepared_images : dict Container for processed/adjusted/merged/stitched images, indexed by user-defined acronyms.
Notes
Processed images are stored only in memory until saved explicitly with
save_prepared_images().
Raw images loaded from NucleiFinder analyses can be saved for later reuse
in a serialized .inuc format using save_raw().
Examples
Load image results from an analysis:
>>> manager = ImagesManagement.load_experimental_images(results, "experiment1")
Adjust selected images:
>>> manager.adjust_images(
... acronyme="adj",
... path_to_images="path/to/imgs",
... eq=True,
... clahe=True
... )
Merge multiple prepared sets:
>>> manager.image_merging(["adj", "other"], ratio_list=[0.7, 0.3])
Retrieve processed images:
>>> imgs = manager.get_prepared_images("adj")
Save stitched images to disk:
>>> manager.save_prepared_images("stitched_adj_other", "./output/")
478 def __init__(self, images_ids, result_dict, experiment_name): 479 """ 480 Initialize the ImagesManagement object. 481 482 Parameters 483 ---------- 484 images_ids : list[int] 485 List of image identifiers. 486 487 result_dict : dict or None 488 Dictionary containing processed images. 489 490 experiment_name : str 491 Name of the experiment. 492 """ 493 494 self.images_ids = images_ids 495 """Stores the list of image IDs managed by this instance.""" 496 self.results_images = result_dict 497 """Stores dictionary containing processed images.""" 498 self.experiment_name = experiment_name 499 """Stores the experiment name for file naming and organizational purposes.""" 500 self.prepared_images = {} 501 """Dictionary for storing processed images (adjusted, merged, stitched), 502 indexed by user-defined acronyms for flexible retrieval."""
Initialize the ImagesManagement object.
Parameters
images_ids : list[int] List of image identifiers.
result_dict : dict or None Dictionary containing processed images.
experiment_name : str Name of the experiment.
Dictionary for storing processed images (adjusted, merged, stitched), indexed by user-defined acronyms for flexible retrieval.
504 @classmethod 505 def load_from_dict(cls, path: str, experiment_name: str): 506 """ 507 Load an ImagesManagement instance from a `.inuc` serialized dictionary. 508 509 Parameters 510 ---------- 511 path : str 512 Path to the `.inuc` file exported with `save_raw()`. 513 514 experiment_name : str 515 Name of the experiment. 516 517 Returns 518 ------- 519 ImagesManagement 520 A reconstructed ImagesManagement object. 521 """ 522 523 if ".inuc" in path: 524 525 if os.path.exists(path): 526 527 loaded_data = np.load(path) 528 data_dict = {key: loaded_data[key] for key in loaded_data} 529 530 id_list = [] 531 532 for k in data_dict.keys(): 533 id_list.append(re.sub("_.*", "", k)) 534 535 return cls(id_list, data_dict, experiment_name) 536 537 else: 538 raise ValueError("\nInvalid path!") 539 540 else: 541 raise ValueError( 542 "\nInvalid dictionary to load. It must contain a .inuc extension!" 543 )
Load an ImagesManagement instance from a .inuc serialized dictionary.
Parameters
path : str
Path to the .inuc file exported with save_raw().
experiment_name : str Name of the experiment.
Returns
ImagesManagement A reconstructed ImagesManagement object.
545 @classmethod 546 def load_experimental_images(cls, results_dict: dict, experiment_name: str): 547 """ 548 Load results exported from NucleiFinder series analysis. 549 550 Initialize the object with results from series_analysis_nuclei() 551 or series_analysis_chromatinization() of the NucleiFinder class. 552 553 554 Parameters 555 ---------- 556 results_dict : dict 557 Dictionary returned by `series_analysis_nuclei()` or 558 `series_analysis_chromatinization()`. 559 560 experiment_name : str 561 Name of the experiment. 562 563 Returns 564 ------- 565 ImagesManagement 566 567 """ 568 569 res_dict = {} 570 id_list = [] 571 572 if set(results_dict[list(results_dict.keys())[0]].keys()) != set( 573 ["stats", "img"] 574 ): 575 raise ValueError( 576 "Incorrect data provided. The data must come from series_analysis_nuclei() " 577 "or series_analysis_chromatinization() of the NucleiFinder class." 578 ) 579 580 for k in results_dict.keys(): 581 res_dict[k] = results_dict[k]["img"] 582 id_list.append(re.sub("_.*", "", k)) 583 584 return cls(id_list, res_dict, experiment_name)
Load results exported from NucleiFinder series analysis.
Initialize the object with results from series_analysis_nuclei() or series_analysis_chromatinization() of the NucleiFinder class.
Parameters
results_dict : dict
Dictionary returned by series_analysis_nuclei() or
series_analysis_chromatinization().
experiment_name : str Name of the experiment.
Returns
ImagesManagement
586 @classmethod 587 def load_images_ids(cls, images_ids: list, experiment_name: str): 588 """ 589 Initialize the object with list of images IDs for porcesing. 590 591 Parameters 592 ---------- 593 images_ids : list[int] 594 List of selected image IDs. 595 596 experiment_name : str 597 Name of the experiment. 598 599 Returns 600 ------- 601 ImagesManagement 602 603 """ 604 605 if len(images_ids) == 0: 606 raise ValueError( 607 "Incorrect data provided. There must be a list of image IDs." 608 ) 609 610 return cls(images_ids, None, experiment_name)
Initialize the object with list of images IDs for porcesing.
Parameters
images_ids : list[int] List of selected image IDs.
experiment_name : str Name of the experiment.
Returns
ImagesManagement
612 def get_included_acronyms(self): 613 """ 614 Print the data acronyms for adjusted images, processed using the 615 self.adjust_images(), self.image_merging(), and self.image_stitching() methods. 616 617 Acronym information is essential for retrieving and saving data using 618 the self.get_prepared_images() and self.save_prepared_images() methods. 619 620 Notes 621 ----- 622 This method prints the list of available acronyms but does not return it. 623 624 """ 625 626 if len(self.prepared_images.keys()) > 0: 627 print("\nAvaiable stored images:\n") 628 for kd in self.prepared_images.keys(): 629 print(kd) 630 631 else: 632 print("Nothing to return!")
Print the data acronyms for adjusted images, processed using the self.adjust_images(), self.image_merging(), and self.image_stitching() methods.
Acronym information is essential for retrieving and saving data using the self.get_prepared_images() and self.save_prepared_images() methods.
Notes
This method prints the list of available acronyms but does not return it.
634 def get_prepared_images(self, acronyme=None): 635 """ 636 Retrieves the prepared images (returned from adjust_images()) stored in the object. 637 638 639 Parameters 640 ---------- 641 acronyme : str or None 642 Acronym identifying a processed image set. If None, prints available keys. 643 644 645 Returns 646 ------- 647 dict 648 Dictionary of prepared images. 649 """ 650 651 if acronyme is None: 652 653 self.get_included_acronyms() 654 655 else: 656 657 if acronyme in list(self.prepared_images.keys()): 658 return self.prepared_images[acronyme] 659 660 raise ValueError("Incorrect acronyme!")
Retrieves the prepared images (returned from adjust_images()) stored in the object.
Parameters
acronyme : str or None Acronym identifying a processed image set. If None, prints available keys.
Returns
dict Dictionary of prepared images.
662 def save_prepared_images(self, acronyme: str, path_to_save: str = ""): 663 """ 664 Saves prepared images (returned from adjust_images() method) to the specified directory. 665 666 Parameters 667 ---------- 668 path_to_save : str 669 Directory path where the images will be saved. Default is the current working directory. 670 671 """ 672 if acronyme is None: 673 674 self.get_included_acronyms() 675 676 else: 677 678 if acronyme in list(self.prepared_images.keys()): 679 680 path_to_save = os.path.join( 681 path_to_save, f"{self.experiment_name}_{acronyme}" 682 ) 683 684 if not os.path.exists(path_to_save): 685 os.makedirs(path_to_save, exist_ok=True) 686 687 for i in tqdm(self.prepared_images[acronyme].keys()): 688 cv2.imwrite( 689 os.path.join(path_to_save, i + ".png"), 690 self.prepared_images[acronyme][i], 691 ) 692 693 else: 694 raise ValueError("Incorrect acronyme!")
Saves prepared images (returned from adjust_images() method) to the specified directory.
Parameters
path_to_save : str Directory path where the images will be saved. Default is the current working directory.
696 def adjust_images( 697 self, 698 acronyme: str, 699 path_to_images: str, 700 file_extension: str = "tif", 701 eq: bool = True, 702 clahe: bool = True, 703 kernal: tuple = (50, 50), 704 fille_name_part: str = "", 705 color: str = "gray", 706 max_intensity: int = 65535, 707 min_intenisty: int = 0, 708 brightness: int = 1000, 709 contrast: float = 1.0, 710 gamma: float = 1.0, 711 img_n: int = 0, 712 ): 713 """ 714 Prepares selected images for processing, applying histogram equalization and CLAHE, if required. 715 716 Parameters 717 ---------- 718 acronyme : str 719 Name of images being adjusted in this run. 720 721 path_to_images : str 722 Path to the directory containing images. 723 724 file_extension : str 725 Image file extension. Default is 'tiff'. 726 727 eq : bool 728 Whether to apply histogram equalization. Default is True. 729 730 clahe : bool 731 Whether to apply CLAHE. Default is True. 732 733 kernal : tuple 734 Kernel size for CLAHE. Default is (50, 50). 735 736 fille_name_part : str 737 Part of the file name to filter images. Default is an empty string. 738 739 color : str 740 Color space to use. Default is 'gray'. 741 742 max_intensity : int 743 Maximum intensity for image adjustment. Default is 65535. 744 745 min_intenisty : int 746 Minimum intensity for image adjustment. Default is 0. 747 748 brightness : int 749 Brightness adjustment value. Default is 1000. 750 751 contrast : float 752 Contrast adjustment factor. Default is 1.0. 753 754 gamma : float 755 Gamma correction factor. Default is 1.0. 756 757 img_n : int 758 Number of images to process. Default is 0, which means all images. 759 760 761 Returns 762 ------- 763 dict 764 Dictionary containing the processed images. 765 766 Notes 767 ----- 768 To access the processed images, use the ``get_prepared_images()`` method. 769 770 To save the processed images to disk, use the ``save_prepared_images()`` method. 771 """ 772 773 results_dict = {} 774 775 files = glob.glob(os.path.join(path_to_images, "*." + file_extension)) 776 777 if len(fille_name_part) > 0: 778 files = [x for x in files if fille_name_part.lower() in x.lower()] 779 780 selected_id = self.images_ids 781 782 if len(selected_id) > 0: 783 selected_id = [str(x) for x in selected_id] 784 files = [ 785 x 786 for x in files 787 if re.sub("_.*", "", os.path.basename(x)) in selected_id 788 ] 789 790 if img_n > 0: 791 792 files = random.sample(files, img_n) 793 794 for file in tqdm(files): 795 796 image = load_image(file) 797 798 try: 799 image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) 800 except: 801 pass 802 803 if eq is True: 804 image = equalizeHist_16bit(image) 805 806 if clahe is True: 807 image = clahe_16bit(image, kernal=kernal) 808 809 image = adjust_img_16bit( 810 img=image, 811 color=color, 812 max_intensity=max_intensity, 813 min_intenisty=min_intenisty, 814 brightness=brightness, 815 contrast=contrast, 816 gamma=gamma, 817 ) 818 819 results_dict[os.path.basename(file)] = image 820 821 self.prepared_images[acronyme] = results_dict
Prepares selected images for processing, applying histogram equalization and CLAHE, if required.
Parameters
acronyme : str Name of images being adjusted in this run.
path_to_images : str Path to the directory containing images.
file_extension : str Image file extension. Default is 'tiff'.
eq : bool Whether to apply histogram equalization. Default is True.
clahe : bool Whether to apply CLAHE. Default is True.
kernal : tuple Kernel size for CLAHE. Default is (50, 50).
fille_name_part : str Part of the file name to filter images. Default is an empty string.
color : str Color space to use. Default is 'gray'.
max_intensity : int Maximum intensity for image adjustment. Default is 65535.
min_intenisty : int Minimum intensity for image adjustment. Default is 0.
brightness : int Brightness adjustment value. Default is 1000.
contrast : float Contrast adjustment factor. Default is 1.0.
gamma : float Gamma correction factor. Default is 1.0.
img_n : int Number of images to process. Default is 0, which means all images.
Returns
dict Dictionary containing the processed images.
Notes
To access the processed images, use the get_prepared_images() method.
To save the processed images to disk, use the save_prepared_images() method.
823 def image_merging(self, acronyms: list, ratio_list: list): 824 """ 825 Merge previously prepared images stored in `self.prepared_images`, 826 adjusted based on the image ratios. The used ratios adjust relative image intensity. 827 828 Parameters 829 ---------- 830 acronyme : list 831 List of image names to be merged. 832 833 ratio_list : list[float] 834 List of ratio intensity values (0.0–1.0) for the merged image. 835 The `acronyme` list and `ratio_list` must be of the same length. 836 837 Returns 838 ------- 839 dict 840 Dictionary of processed images. 841 842 Notes 843 ----- 844 To access the processed images, use the ``get_prepared_images()`` method. 845 846 To save the processed images to disk, use the ``save_prepared_images()`` method. 847 """ 848 849 for a in acronyms: 850 if a not in list(self.prepared_images.keys()): 851 raise ValueError(f"Incorrect {a} acronyme!") 852 853 results_img = {} 854 for k in self.images_ids: 855 img_list = [] 856 for a in acronyms: 857 nam = [ 858 x 859 for x in self.prepared_images[a].keys() 860 if str(k) == re.sub("_.*", "", x) 861 ] 862 if len(nam) == 0: 863 print(f"There were not images for {k} ids") 864 break 865 866 img_list.append(self.prepared_images[a][nam[0]]) 867 868 if len(img_list) == len(acronyms): 869 results_img[f'{k}_{"_".join(acronyms)}'] = merge_images( 870 img_list, ratio_list 871 ) 872 873 self.prepared_images[f'merged_{"_".join(acronyms)}'] = results_img 874 875 print(f'Images stored in self.prepared_images["merged_{"_".join(acronyms)}"]')
Merge previously prepared images stored in self.prepared_images,
adjusted based on the image ratios. The used ratios adjust relative image intensity.
Parameters
acronyme : list List of image names to be merged.
ratio_list : list[float]
List of ratio intensity values (0.0–1.0) for the merged image.
The acronyme list and ratio_list must be of the same length.
Returns
dict Dictionary of processed images.
Notes
To access the processed images, use the get_prepared_images() method.
To save the processed images to disk, use the save_prepared_images() method.
877 def image_stitching(self, acronyms: list, to_results_image: bool = False): 878 """ 879 Stitch (horizontally) previously prepared images stored in `self.prepared_images`. 880 881 Parameters 882 ---------- 883 acronyme : list 884 List of image names to be stitched. 885 886 to_results_image : bool 887 Boolean value indicating whether images obtained from the 888 `series_analysis_nuclei()` or `series_analysis_chromatinization()` 889 methods of the `NucleiFinder` class should be stitched to the right 890 side of the images in the `acronyme` list. 891 892 Returns 893 ------- 894 dict 895 Dictionary of processed images. 896 897 Notes 898 ----- 899 To access the processed images, use the ``get_prepared_images()`` method. 900 901 To save the processed images to disk, use the ``save_prepared_images()`` method. 902 """ 903 904 for a in acronyms: 905 if a not in list(self.prepared_images.keys()): 906 raise ValueError(f"Incorrect {a} acronyme!") 907 908 results_img = {} 909 for k in tqdm(self.images_ids): 910 img_list = [] 911 for a in acronyms: 912 nam = [ 913 x 914 for x in self.prepared_images[a].keys() 915 if str(k) == re.sub("_.*", "", x) 916 ] 917 if len(nam) == 0: 918 print(f"There were not images for {k} ids") 919 break 920 921 img_list.append(self.prepared_images[a][nam[0]]) 922 923 if to_results_image: 924 nam = [ 925 x 926 for x in self.results_images.keys() 927 if str(k) == re.sub("_.*", "", x) 928 ] 929 if len(nam) != 0: 930 img_list.append(self.results_images[nam[0]]) 931 932 if len(img_list) == len(acronyms) + 1: 933 results_img[f'{k}_{"_".join(acronyms)}_res'] = cv2.hconcat(img_list) 934 935 elif to_results_image is not False: 936 if len(img_list) == len(acronyms): 937 results_img[f'{k}_{"_".join(acronyms)}'] = cv2.hconcat(img_list) 938 939 self.prepared_images[f'stitched_{"_".join(acronyms)}'] = results_img 940 941 print(f'Images stored in self.prepared_images["stitched_{"_".join(acronyms)}"]')
Stitch (horizontally) previously prepared images stored in self.prepared_images.
Parameters
acronyme : list List of image names to be stitched.
to_results_image : bool
Boolean value indicating whether images obtained from the
series_analysis_nuclei() or series_analysis_chromatinization()
methods of the NucleiFinder class should be stitched to the right
side of the images in the acronyme list.
Returns
dict Dictionary of processed images.
Notes
To access the processed images, use the get_prepared_images() method.
To save the processed images to disk, use the save_prepared_images() method.
943 def save_raw(self, path_to_save: str = ""): 944 """ 945 Save `self.results_images` loaded by the `self.load_experimental_images()` method, 946 obtained from the `series_analysis_nuclei()` or `series_analysis_chromatinization()` 947 methods of the `NucleiFinder` class for later usage with cls.load_from_dict() method. 948 The data will be saved with a `.inuc` extension. 949 950 Parameters 951 ---------- 952 path_to_save : str 953 The directory path where the images will be saved. 954 Default is the current working directory. 955 """ 956 957 full_path = os.path.join(path_to_save, f"{self.experiment_name}.inuc") 958 959 np.savez(full_path, **self.results_images)
Save self.results_images loaded by the self.load_experimental_images() method,
obtained from the series_analysis_nuclei() or series_analysis_chromatinization()
methods of the NucleiFinder class for later usage with cls.load_from_dict() method.
The data will be saved with a .inuc extension.
Parameters
path_to_save : str The directory path where the images will be saved. Default is the current working directory.
962class NucleiFinder(ImageTools, RepTools): 963 """ 964 Implements a comprehensive pipeline for automated segmentation, 965 selection, and analysis of cell nuclei and their internal chromatin structure 966 in microscopy images. 967 968 It utilizes a pre-trained deep learning model (StarDist2D) for initial 969 nuclear identification, followed by the application of advanced morphological 970 and intensity filters, and a dedicated algorithm for quantifying chromatinization. 971 The class provides detailed control over the hyperparameters for both the 972 segmentation process and image preprocessing stages. 973 974 Parameters 975 ---------- 976 image : np.ndarray, optional 977 The input image (typically 16-bit) for analysis. 978 979 test_results : list, optional 980 Plots resulting from parameter testing (e.g., NMS/Prob combinations). 981 982 hyperparameter_nuclei : dict, optional 983 Parameters for nuclei segmentation and filtering (e.g., 'nms', 'prob', 'min_size', 'circularity'). 984 985 hyperparameter_chromatinization : dict, optional 986 Parameters for segmenting and filtering chromatin spots (e.g., 'cut_point', 'ratio'). 987 988 img_adj_par_chrom : dict, optional 989 Image adjustment parameters (gamma, contrast) specifically for chromatin analysis. 990 991 img_adj_par : dict, optional 992 Image adjustment parameters for nuclei segmentation. 993 994 show_plots : bool, optional 995 Flag controlling the automatic display of visual results. 996 997 nuclei_results : dict, optional 998 A dictionary storing numerical data (features) extracted from the nuclei. 999 1000 images : dict, optional 1001 A dictionary storing output images and masks. 1002 1003 Attributes 1004 ---------- 1005 image : np.ndarray 1006 The currently loaded image for analysis. 1007 1008 test_results : list 1009 The visual outcomes of NMS/Prob parameter tests. 1010 1011 hyperparameter_nuclei : dict 1012 A dictionary of active parameters used by the `find_nuclei()` and `select_nuclei()` methods. 1013 1014 hyperparameter_chromatinization : dict 1015 A dictionary of active parameters used by the `nuclei_chromatinization()` method. 1016 1017 img_adj_par : dict 1018 Image correction parameters for nuclei segmentation. 1019 1020 img_adj_par_chrom : dict 1021 Image correction parameters for chromatin analysis. 1022 1023 show_plots : bool 1024 The state of the plot display flag. 1025 1026 nuclei_results : dict 1027 Stores feature dictionaries for: all detected ('nuclei'), selected ('nuclei_reduced'), 1028 and chromatinization data ('nuclei_chromatinization'). 1029 1030 images : dict 1031 Stores masks and images visualizing the results. 1032 1033 series_im : bool 1034 Flag indicating if the class is operating in a batch or series processing mode. 1035 1036 Methods 1037 ------- 1038 set_nms(nms) 1039 Sets the Non-Maximum Suppression (NMS) threshold. 1040 1041 set_prob(prob) 1042 Sets the segmentation probability threshold. 1043 1044 set_nuclei_circularity(circ) 1045 Sets the minimum required circularity for a nucleus. 1046 1047 set_nuclei_local_intenisty_FC(local_FC) 1048 Sets the factor used for removing false positives based on local intensity differences. 1049 1050 set_nuclei_global_area_FC(global_FC) 1051 Sets the factor used for removing size-based outlier false positives. 1052 1053 set_nuclei_size(size) 1054 Sets the minimum and maximum area (in pixels) for nuclei selection. 1055 1056 set_nuclei_min_mean_intensity(intensity) 1057 Sets the minimum required mean intensity value for a nucleus. 1058 1059 set_chromatinization_size(size) 1060 Sets the minimum and maximum area (in pixels) for chromatin spot selection. 1061 1062 set_chromatinization_cut_point(cut_point) 1063 Sets the factor used to adjust the chromatin segmentation threshold (Otsu's method). 1064 1065 set_adj_image_gamma(gamma) 1066 Sets the gamma correction for the nuclei image. 1067 1068 set_adj_chrom_contrast(contrast) 1069 Sets the contrast adjustment for the chromatinization image. 1070 1071 current_parameters_nuclei (property) 1072 Returns the active nuclei segmentation and filtering parameters. 1073 1074 find_nuclei() 1075 Performs nuclei segmentation using StarDist and extracts initial features. 1076 1077 select_nuclei() 1078 Filters the detected nuclei based on set morphological and intensity criteria. 1079 1080 nuclei_chromatinization() 1081 Performs quantitative and morphological analysis of chromatin spots in selected nuclei. 1082 1083 get_features(model_out, image) 1084 Calculates geometric and intensity features from a segmented mask (label image). 1085 1086 Notes 1087 ----- 1088 The typical analysis workflow follows this order: 1089 1. `input_image()` 1090 2. `find_nuclei()` 1091 3. `select_nuclei()` (Optional) 1092 4. `nuclei_chromatinization()` (Optional) 1093 """ 1094 1095 def __init__( 1096 self, 1097 image=None, 1098 test_results=None, 1099 hyperparameter_nuclei=None, 1100 hyperparameter_chromatinization=None, 1101 img_adj_par_chrom=None, 1102 img_adj_par=None, 1103 show_plots=None, 1104 nuclei_results=None, 1105 images=None, 1106 ): 1107 """ 1108 The main class for the detection and analysis of cell nuclei and their chromatinization 1109 in microscopy or flow cytometry images, utilizing the StarDist segmentation model. 1110 1111 This class inherits functionality for image processing (ImageTools) and 1112 results handling (RepTools). 1113 1114 Parameters 1115 ---------- 1116 image : np.ndarray, optional 1117 The input image for analysis. 1118 Default: None. 1119 1120 test_results : list, optional 1121 A list of plots or images resulting from parameter testing. 1122 Default: None. 1123 1124 hyperparameter_nuclei : dict, optional 1125 The segmentation parameters for nuclei detection. 1126 Default: 1127 {'nms': 0.8, 'prob': 0.4, 'max_size': 1000, 'min_size': 20, 1128 'circularity': 0.6, 'intensity_mean': 6553.5, 'nn_min': 10, 1129 'FC_diff_global': 1.5, 'FC_diff_local_intensity': 0.6} 1130 1131 hyperparameter_chromatinization : dict, optional 1132 The analysis parameters for chromatin spots within the nuclei. 1133 Default: 1134 {'max_size': 800, 'min_size': 2, 'ratio': 0.1, 'cut_point': 0.95} 1135 1136 img_adj_par_chrom : dict, optional 1137 Image adjustment parameters (gamma, contrast, brightness) for chromatin analysis. 1138 Default: {'gamma': 0.25, 'contrast': 5, 'brightness': 950} 1139 1140 img_adj_par : dict, optional 1141 Image adjustment parameters (gamma, contrast, brightness) for nuclei segmentation. 1142 Default: {'gamma': 0.9, 'contrast': 2, 'brightness': 1000} 1143 1144 show_plots : bool, optional 1145 Flag to determine whether results and plots should be displayed automatically. 1146 Default: True. 1147 1148 nuclei_results : dict, optional 1149 A dictionary storing the numerical results of the analysis. 1150 Default: {'nuclei': None, 'nuclei_reduced': None, 'nuclei_chromatinization': None} 1151 1152 images : dict, optional 1153 A dictionary storing the output images (e.g., masks). 1154 Default: {'nuclei': None, 'nuclei_reduced': None, 'nuclei_chromatinization': None} 1155 1156 Attributes 1157 ---------- 1158 image : np.ndarray 1159 The currently loaded image for analysis. 1160 1161 hyperparameter_nuclei : dict 1162 Active nuclei segmentation parameters. 1163 1164 hyperparameter_chromatinization : dict 1165 Active chromatinization analysis parameters. 1166 1167 img_adj_par : dict 1168 Active image correction parameters for nuclei segmentation. 1169 1170 img_adj_par_chrom : dict 1171 Active image correction parameters for chromatin analysis. 1172 1173 show_plots : bool 1174 The current state of the plot display flag. 1175 1176 series_im : bool 1177 Flag indicating if a series of images is being processed. 1178 1179 Notes 1180 ----- 1181 The default value for 'intensity_mean' in hyperparameter_nuclei is calculated 1182 as $(2^{16} - 1) / 10$, which represents 10% of the maximum 16-bit value (65535 / 10 = 6553.5). 1183 1184 The image adjustment parameters are crucial for optimizing contrast and brightness 1185 to improve the performance of both the StarDist model and the subsequent 1186 chromatin thresholding. 1187 """ 1188 1189 # Use default values if parameters are None 1190 self.image = image or None 1191 """Loaded input image.""" 1192 self.test_results = test_results or None 1193 """Results of parameter tests. 1194 1195 This attribute or method stores the outcomes of parameter testing procedures. 1196 For interactive browsing and inspection of the results, use the 1197 `browser_test(self)` method.""" 1198 1199 self.hyperparameter_nuclei = hyperparameter_nuclei or { 1200 "nms": 0.8, 1201 "prob": 0.4, 1202 "max_size": 1000, 1203 "min_size": 20, 1204 "circularity": 0.6, 1205 "intensity_mean": (2**16 - 1) / 10, 1206 "nn_min": 10, 1207 "FC_diff_global": 1.5, 1208 "FC_diff_local_intensity": 0.6, 1209 } 1210 """Active nuclei segmentation/filter parameters.""" 1211 1212 self.hyperparameter_chromatinization = hyperparameter_chromatinization or { 1213 "max_size": 800, 1214 "min_size": 2, 1215 "ratio": 0.1, 1216 "cut_point": 0.95, 1217 } 1218 """Active chromatin analysis parameters.""" 1219 1220 self.img_adj_par_chrom = img_adj_par_chrom or { 1221 "gamma": 0.25, 1222 "contrast": 5, 1223 "brightness": 950, 1224 } 1225 """Image adjustment for chromatin analysis.""" 1226 1227 self.img_adj_par = img_adj_par or { 1228 "gamma": 0.9, 1229 "contrast": 2, 1230 "brightness": 1000, 1231 } 1232 """Image adjustment for nuclei segmentation.""" 1233 1234 self.show_plots = show_plots or True 1235 """Flag controlling plot display.""" 1236 1237 self.nuclei_results = nuclei_results or { 1238 "nuclei": None, 1239 "nuclei_reduced": None, 1240 "nuclei_chromatinization": None, 1241 } 1242 """Stored dictionary of nuclei analysis results.""" 1243 1244 self.images = images or { 1245 "nuclei": None, 1246 "nuclei_reduced": None, 1247 "nuclei_chromatinization": None, 1248 } 1249 """Stored dictionary of images from nuclei analysis.""" 1250 1251 # sereies images 1252 self.series_im = False 1253 """Flag for batch/series image processing.""" 1254 1255 def set_nms(self, nms: float): 1256 """ 1257 Set the Non-Maximum Suppression (NMS) threshold. 1258 1259 The NMS threshold controls how aggressively overlapping detections are suppressed. 1260 A lower value reduces the probability of overlapping nuclei being kept. 1261 1262 Parameters 1263 ---------- 1264 nms : float 1265 The NMS IoU threshold value. 1266 """ 1267 1268 self.hyperparameter_nuclei["nms"] = nms 1269 1270 def set_prob(self, prob: float): 1271 """ 1272 Set the probability threshold used in segmentation. 1273 1274 The probability threshold determines the minimum confidence required for an object 1275 (e.g., a nucleus) to be classified as a segmented entity. Higher values result in 1276 fewer segmented objects, as only detections with strong confidence scores are kept. 1277 This may lead to omission of weaker or less distinct structures. 1278 1279 Because optimal values depend on image characteristics, it is important to visually 1280 inspect segmentation results produced with different thresholds to determine the 1281 most suitable setting. 1282 1283 Parameters 1284 ---------- 1285 prob : float 1286 The probability threshold value. 1287 """ 1288 1289 self.hyperparameter_nuclei["prob"] = prob 1290 1291 def set_nuclei_circularity(self, circ: float): 1292 """ 1293 This method sets 'circ' parameter. The circ is a parameter used for adjust minimal nucleus circularity. 1294 1295 Parameters 1296 ---------- 1297 circ : float 1298 Nuclei circularity value. 1299 """ 1300 1301 self.hyperparameter_nuclei["circularity"] = circ 1302 1303 def set_nuclei_local_intenisty_FC(self, local_FC: float): 1304 """ 1305 This method sets the 'FC_diff_local_intensity' parameter. The 'local_FC' is used to remove false positive multiple nuclei that were detected in single image. 1306 1307 Parameters 1308 ---------- 1309 local_FC : float 1310 local_FC value. 1311 """ 1312 1313 self.hyperparameter_nuclei["FC_diff_local_intensity"] = local_FC 1314 1315 # change 1316 def set_nuclei_global_area_FC(self, global_FC: float): 1317 """ 1318 This method sets the 'FC_diff_global' parameter. The 'global_FC' is used to remove false positive multiple nuclei that were detected in a single image and are outliers from the global mean area size. 1319 1320 Parameters 1321 ---------- 1322 FC_diff_global : float 1323 global_FC value. 1324 """ 1325 1326 self.hyperparameter_nuclei["FC_diff_global"] = global_FC 1327 1328 def set_nuclei_size(self, size: tuple): 1329 """ 1330 This method sets 'size' parameter. The size is a parameter used for adjust minimal and maximal nucleus area (px). 1331 1332 Parameters 1333 ---------- 1334 size : tuple 1335 (min_value, max_value) 1336 """ 1337 1338 self.hyperparameter_nuclei["min_size"] = size[0] 1339 self.hyperparameter_nuclei["max_size"] = size[1] 1340 1341 def set_nuclei_min_mean_intensity(self, intensity: int): 1342 """ 1343 This method sets 'intensity' parameter. The 'intensity' parameter is used to adjust the minimum mean intensity of all pixel intensities within the nucleus. 1344 1345 Parameters 1346 ---------- 1347 intensity : int 1348 intensity value. 1349 """ 1350 1351 self.hyperparameter_nuclei["intensity_mean"] = intensity 1352 1353 def set_chromatinization_size(self, size: tuple): 1354 """ 1355 This method sets 'size' parameter. The size is a parameter used for adjust minimal and maximal chromanitization spot area (px) within the nucleus. 1356 1357 Parameters 1358 ---------- 1359 size : tuple 1360 (min_value, max_value) 1361 """ 1362 1363 self.hyperparameter_chromatinization["min_size"] = size[0] 1364 self.hyperparameter_chromatinization["max_size"] = size[1] 1365 1366 def set_chromatinization_ratio(self, ratio: int): 1367 """ 1368 This method sets the 'ratio' parameter. In this case, the 'ratio' parameter is similar to 'circularity' as it describes the ratio between the maximum lengths in the x and y dimensions of the nucleus chromatinization. 1369 1370 Parameters 1371 ---------- 1372 ratio : float 1373 ratio value. 1374 """ 1375 1376 self.hyperparameter_chromatinization["ratio"] = ratio 1377 1378 def set_chromatinization_cut_point(self, cut_point: int): 1379 """ 1380 This method sets the 'cut_point' parameter. The 'cut_point' parameter is a factor used to adjust the threshold for separating the background from chromatin spots. 1381 1382 Parameters 1383 ---------- 1384 cut_point : int 1385 cut_point value. 1386 """ 1387 1388 self.hyperparameter_chromatinization["cut_point"] = cut_point 1389 1390 # 1391 1392 def set_adj_image_gamma(self, gamma: float): 1393 """ 1394 This method sets 'gamma' parameter. The gamma is a parameter used for adjust gamma of the nucleus image. 1395 1396 Parameters 1397 ---------- 1398 gamma : float 1399 gamma value. 1400 """ 1401 1402 self.img_adj_par["gamma"] = gamma 1403 1404 def set_adj_image_contrast(self, contrast: float): 1405 """ 1406 This method sets 'contrast' parameter. The contrast is a parameter used for adjust contrast of the nucleus image. 1407 1408 Parameters 1409 ---------- 1410 contrast : float 1411 contrast value. 1412 """ 1413 1414 self.img_adj_par["contrast"] = contrast 1415 1416 def set_adj_image_brightness(self, brightness: float): 1417 """ 1418 This method sets 'brightness' parameter. The brightness is a parameter used for adjust brightness of the nucleus image. 1419 1420 Parameters 1421 ---------- 1422 brightness : float 1423 brightness value. 1424 """ 1425 1426 self.img_adj_par["brightness"] = brightness 1427 1428 # 1429 1430 def set_adj_chrom_gamma(self, gamma: float): 1431 """ 1432 This method sets 'gamma' parameter. The gamma is a parameter used for adjust gamma of the nucleus chromatinization image. 1433 1434 Parameters 1435 ---------- 1436 gamma : float 1437 gamma value. 1438 """ 1439 1440 self.img_adj_par_chrom["gamma"] = gamma 1441 1442 def set_adj_chrom_contrast(self, contrast: float): 1443 """ 1444 This method sets 'contrast' parameter. The contrast is a parameter used for adjust contrast of the nucleus chromatinization image. 1445 1446 Parameters 1447 ---------- 1448 contrast : float 1449 contrast value. 1450 """ 1451 1452 self.img_adj_par_chrom["contrast"] = contrast 1453 1454 def set_adj_chrom_brightness(self, brightness: float): 1455 """ 1456 This method sets 'brightness' parameter. The brightness is a parameter used for adjust brightness of the nucleus chromatinization image. 1457 1458 Parameters 1459 ---------- 1460 brightness : float 1461 brightness value. 1462 """ 1463 1464 self.img_adj_par_chrom["brightness"] = brightness 1465 1466 @property 1467 def current_parameters_nuclei(self): 1468 """ 1469 This method returns current nuclei analysis parameters. 1470 1471 Returns 1472 ------- 1473 dict 1474 Nuclei analysis parameters. 1475 """ 1476 print(self.hyperparameter_nuclei) 1477 return self.hyperparameter_nuclei 1478 1479 @property 1480 def current_parameters_chromatinization(self): 1481 """ 1482 This method returns current nuclei chromatinization analysis parameters. 1483 1484 Returns 1485 ------- 1486 dict 1487 Nuclei chromatinization analysis parameters. 1488 """ 1489 1490 print(self.hyperparameter_chromatinization) 1491 return self.hyperparameter_chromatinization 1492 1493 @property 1494 def current_parameters_img_adj(self): 1495 """ 1496 This method returns current nuclei image setup. 1497 1498 Returns 1499 ------- 1500 dict 1501 Nuclei image setup. 1502 """ 1503 1504 print(self.img_adj_par) 1505 return self.img_adj_par 1506 1507 @property 1508 def current_parameters_img_adj_chro(self): 1509 """ 1510 This method returns current nuclei chromatinization image setup. 1511 1512 Returns 1513 ------- 1514 dict 1515 Nuclei chromatinization image setup. 1516 """ 1517 1518 print(self.img_adj_par_chrom) 1519 return self.img_adj_par_chrom 1520 1521 def get_results_nuclei(self): 1522 """ 1523 This function returns nuclei analysis results. 1524 1525 Returns 1526 ------- 1527 dict 1528 Nuclei results in the dictionary format. 1529 """ 1530 1531 if self.images["nuclei"] is None: 1532 print("No results to return!") 1533 return None 1534 else: 1535 if cfg._DISPLAY_MODE: 1536 if self.show_plots: 1537 display_preview(self.resize_to_screen_img(self.images["nuclei"])) 1538 return self.nuclei_results["nuclei"], self.images["nuclei"] 1539 1540 def get_results_nuclei_selected(self): 1541 """ 1542 This function returns the results of the nuclei analysis following adjustments to the data selection thresholds. 1543 1544 Returns 1545 ------- 1546 dict 1547 Nuclei results in the dictionary format. 1548 """ 1549 1550 if self.images["nuclei_reduced"] is None: 1551 print("No results to return!") 1552 return None 1553 else: 1554 if cfg._DISPLAY_MODE: 1555 if self.show_plots: 1556 display_preview( 1557 self.resize_to_screen_img(self.images["nuclei_reduced"]) 1558 ) 1559 return self.nuclei_results["nuclei_reduced"], self.images["nuclei_reduced"] 1560 1561 def get_results_nuclei_chromatinization(self): 1562 """ 1563 This function returns the results of the nuclei chromatinization analysis. 1564 1565 Returns 1566 ------- 1567 dict 1568 Nuclei chromatinization results in the dictionary format. 1569 """ 1570 1571 if self.images["nuclei_chromatinization"] is None: 1572 print("No results to return!") 1573 return None 1574 else: 1575 if cfg._DISPLAY_MODE: 1576 if self.show_plots: 1577 display_preview(self.images["nuclei_chromatinization"]) 1578 return ( 1579 self.nuclei_results["nuclei_chromatinization"], 1580 self.images["nuclei_chromatinization"], 1581 ) 1582 1583 def add_test(self, plots): 1584 self.test_results = plots 1585 1586 """ 1587 Helper method. 1588 """ 1589 1590 def input_image(self, img): 1591 """ 1592 This method adds the image to the class for nuclei and/or chromatinization analysis. 1593 1594 Parameters 1595 ---------- 1596 img : np.ndarray 1597 Input image. 1598 """ 1599 1600 self.image = img 1601 self.add_test(None) 1602 1603 def get_features(self, model_out, image): 1604 """ 1605 Extracts numerical feature descriptors from model output for a given image. 1606 1607 This method processes the output returned by a feature-extraction model 1608 (e.g., CNN, encoder network, statistical model) and converts it into a 1609 structured feature vector associated with the provided image. 1610 Typically used for downstream analysis, classification, or clustering. 1611 1612 Parameters 1613 ---------- 1614 model_out : any 1615 Output returned by the feature-extraction model. 1616 The expected format depends on the model (e.g., tensor, dict, list of arrays). 1617 1618 image : ndarray 1619 The input image (2D or 3D array) for which features are being extracted. 1620 Provided for reference or for combining raw image metrics with model features. 1621 1622 Returns 1623 ------- 1624 features : dict 1625 Dictionary containing extracted features. 1626 Keys correspond to feature names, and values are numerical descriptors. 1627 """ 1628 1629 features = { 1630 "label": [], 1631 "area": [], 1632 "area_bbox": [], 1633 "area_convex": [], 1634 "area_filled": [], 1635 "axis_major_length": [], 1636 "axis_minor_length": [], 1637 "eccentricity": [], 1638 "equivalent_diameter_area": [], 1639 "feret_diameter_max": [], 1640 "solidity": [], 1641 "perimeter": [], 1642 "perimeter_crofton": [], 1643 "circularity": [], 1644 "intensity_max": [], 1645 "intensity_mean": [], 1646 "intensity_min": [], 1647 "ratio": [], 1648 "coords": [], 1649 } 1650 1651 for region in skimage.measure.regionprops(model_out, intensity_image=image): 1652 1653 # Compute circularity 1654 if region.perimeter > 0: 1655 circularity = 4 * np.pi * region.area / (region.perimeter**2) 1656 else: 1657 circularity = 0 1658 1659 features["area"].append(region.area) 1660 features["area_bbox"].append(region.area_bbox) 1661 features["area_convex"].append(region.area_convex) 1662 features["area_filled"].append(region.area_filled) 1663 features["axis_major_length"].append(region.axis_major_length) 1664 features["axis_minor_length"].append(region.axis_minor_length) 1665 features["eccentricity"].append(region.eccentricity) 1666 features["equivalent_diameter_area"].append(region.equivalent_diameter_area) 1667 features["feret_diameter_max"].append(region.feret_diameter_max) 1668 features["solidity"].append(region.solidity) 1669 features["perimeter"].append(region.perimeter) 1670 features["perimeter_crofton"].append(region.perimeter_crofton) 1671 features["label"].append(region.label) 1672 features["coords"].append(region.coords) 1673 features["circularity"].append(circularity) 1674 features["intensity_max"].append(np.max(region.intensity_max)) 1675 features["intensity_min"].append(np.max(region.intensity_min)) 1676 features["intensity_mean"].append(np.max(region.intensity_mean)) 1677 1678 ratios = [] 1679 1680 # Calculate the ratio for each pair of values 1681 for min_len, max_len in zip( 1682 features["axis_minor_length"], features["axis_major_length"] 1683 ): 1684 if max_len != 0: 1685 ratio = min_len / max_len 1686 ratios.append(ratio) 1687 else: 1688 ratios.append(float(0.0)) 1689 1690 features["ratio"] = ratios 1691 1692 return features 1693 1694 # repaired stat 1695 def nuclei_finder_test(self): 1696 """ 1697 This method performs testing analysis of parameters (specified 'nms' and 'prob' parameters) 1698 for the image provided by the input_image() method. 1699 1700 This method evaluates the performance of the internal NucleiFinder 1701 configuration using the currently loaded images, parameters, or model 1702 settings. It is typically used to check whether the detection, segmentation 1703 or preprocessing stages run correctly on sample data. 1704 1705 Examples 1706 -------- 1707 >>> nf.nuclei_finder_test() 1708 >>> nf.browser_test() 1709 """ 1710 1711 StarDist2D.from_pretrained() 1712 model = StarDist2D.from_pretrained("2D_versatile_fluo") 1713 1714 nmst = [0.1, 0.2, 0.6] 1715 probt = [0.1, 0.5, 0.9] 1716 1717 try: 1718 img = cv2.cvtColor(self.image, cv2.COLOR_BGR2GRAY) 1719 except: 1720 img = self.image 1721 1722 plot = [] 1723 1724 # adj img 1725 img = adjust_img_16bit( 1726 img, 1727 brightness=self.img_adj_par["brightness"], 1728 contrast=self.img_adj_par["contrast"], 1729 gamma=self.img_adj_par["gamma"], 1730 ) 1731 img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 1732 1733 fig = plt.figure(dpi=300) 1734 plt.imshow(img) 1735 plt.axis("off") 1736 plt.title("Original", fontsize=25) 1737 1738 if cfg._DISPLAY_MODE: 1739 if self.show_plots: 1740 plt.show() 1741 1742 plot.append(fig) 1743 1744 for n in tqdm(nmst, desc="Loop 1: nmst"): 1745 print(f"\n➡️ Starting outer loop for n = {n}") 1746 1747 for t in tqdm(probt, desc=f" ↳ Loop 2 for n={n}", leave=False): 1748 print(f" → Starting inner loop for t = {t}") 1749 1750 labels, _ = model.predict_instances( 1751 normalize(img.copy()), nms_thresh=n, prob_thresh=t 1752 ) 1753 1754 tmp = self.get_features(model_out=labels, image=img) 1755 1756 fig = plt.figure(dpi=300) 1757 plt.imshow(render_label(labels, img=img)) 1758 plt.axis("off") 1759 plt.title( 1760 f"nms {n} & prob {t} \n detected nuc: {len(tmp['area'])}", 1761 fontsize=25, 1762 ) 1763 1764 if cfg._DISPLAY_MODE: 1765 if self.show_plots: 1766 plt.show() 1767 1768 plot.append(fig) 1769 1770 self.add_test(plot) 1771 1772 def find_nuclei(self): 1773 """ 1774 Performs analysis on the image provided by the ``input_image()`` method 1775 using default or user-defined parameters. 1776 1777 To show current parameters, use: 1778 - ``current_parameters_nuclei`` 1779 - ``current_parameters_img_adj`` 1780 1781 To set new parameters, use: 1782 - ``set_nms()`` 1783 - ``set_prob()`` 1784 - ``set_adj_image_gamma()`` 1785 - ``set_adj_image_contrast()`` 1786 - ``set_adj_image_brightness()`` 1787 1788 To get analysis results, use: 1789 - ``get_results_nuclei()`` 1790 """ 1791 1792 if isinstance(self.image, np.ndarray): 1793 1794 model = StarDist2D.from_pretrained("2D_versatile_fluo") 1795 1796 try: 1797 img = cv2.cvtColor(self.image, cv2.COLOR_BGR2GRAY) 1798 except: 1799 img = self.image 1800 1801 img = adjust_img_16bit( 1802 img, 1803 brightness=self.img_adj_par["brightness"], 1804 contrast=self.img_adj_par["contrast"], 1805 gamma=self.img_adj_par["gamma"], 1806 ) 1807 img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 1808 labels, _ = model.predict_instances( 1809 normalize(img), 1810 nms_thresh=self.hyperparameter_nuclei["nms"], 1811 prob_thresh=self.hyperparameter_nuclei["prob"], 1812 ) 1813 1814 self.nuclei_results["nuclei"] = self.get_features( 1815 model_out=labels, image=img 1816 ) 1817 1818 if len(self.nuclei_results["nuclei"]["coords"]) > 0: 1819 1820 oryginal = adjust_img_16bit(img, color="gray") 1821 1822 # series repaired nuclesu 1823 if self.series_im is True: 1824 self.images["nuclei"] = oryginal 1825 else: 1826 nuclei_mask = adjust_img_16bit( 1827 cv2.cvtColor( 1828 self.create_mask(self.nuclei_results["nuclei"], oryginal), 1829 cv2.COLOR_BGR2GRAY, 1830 ), 1831 color="blue", 1832 ) 1833 concatenated_image = cv2.hconcat([oryginal, nuclei_mask]) 1834 self.images["nuclei"] = concatenated_image 1835 1836 if cfg._DISPLAY_MODE: 1837 if self.show_plots: 1838 display_preview( 1839 self.resize_to_screen_img(self.images["nuclei"]) 1840 ) 1841 1842 else: 1843 1844 self.nuclei_results["nuclei"] = None 1845 self.nuclei_results["nuclei_reduced"] = None 1846 self.nuclei_results["nuclei_chromatinization"] = None 1847 1848 print("Nuclei not detected!") 1849 1850 else: 1851 print("\nAdd image firstly!") 1852 1853 def select_nuclei(self): 1854 """ 1855 Selects data obtained from ``find_nuclei()`` based on the set threshold parameters. 1856 1857 To show current parameters, use: 1858 - ``current_parameters_nuclei`` 1859 1860 To set new parameters, use: 1861 - ``set_nuclei_circularity()`` 1862 - ``set_nuclei_size()`` 1863 - ``set_nuclei_min_mean_intensity()`` 1864 1865 To get analysis results, use: 1866 - ``get_results_nuclei_selected()`` 1867 """ 1868 1869 if self.nuclei_results["nuclei"] is not None: 1870 input_in = copy.deepcopy(self.nuclei_results["nuclei"]) 1871 1872 nuclei_dictionary = self.drop_dict( 1873 input_in, 1874 key="area", 1875 var=self.hyperparameter_nuclei["min_size"], 1876 action=">", 1877 ) 1878 nuclei_dictionary = self.drop_dict( 1879 nuclei_dictionary, 1880 key="area", 1881 var=self.hyperparameter_nuclei["max_size"], 1882 action="<", 1883 ) 1884 nuclei_dictionary = self.drop_dict( 1885 nuclei_dictionary, 1886 key="intensity_mean", 1887 var=self.hyperparameter_nuclei["intensity_mean"], 1888 action=">", 1889 ) 1890 1891 if len(nuclei_dictionary["coords"]) > 0: 1892 1893 self.nuclei_results["nuclei_reduced"] = nuclei_dictionary 1894 1895 try: 1896 img = cv2.cvtColor(self.image, cv2.COLOR_BGR2GRAY) 1897 except: 1898 img = self.image 1899 1900 oryginal = adjust_img_16bit(img, color="gray") 1901 1902 # series repaired nuclesu 1903 if self.series_im is True: 1904 self.images["nuclei_reduced"] = oryginal 1905 else: 1906 nuclei_mask = adjust_img_16bit( 1907 cv2.cvtColor( 1908 self.create_mask( 1909 self.nuclei_results["nuclei_reduced"], oryginal 1910 ), 1911 cv2.COLOR_BGR2GRAY, 1912 ), 1913 color="blue", 1914 ) 1915 concatenated_image = cv2.hconcat([oryginal, nuclei_mask]) 1916 1917 self.images["nuclei_reduced"] = concatenated_image 1918 1919 if cfg._DISPLAY_MODE: 1920 if self.show_plots: 1921 display_preview( 1922 self.resize_to_screen_img(self.images["nuclei_reduced"]) 1923 ) 1924 1925 else: 1926 self.nuclei_results["nuclei"] = None 1927 self.nuclei_results["nuclei_reduced"] = None 1928 self.nuclei_results["nuclei_chromatinization"] = None 1929 1930 print("Selected zero nuclei! Analysis stop!") 1931 1932 else: 1933 print("Lack of nuclei data to select!") 1934 1935 def nuclei_chromatinization(self): 1936 """ 1937 Performs chromatinization analysis of nuclei using data obtained from 1938 ``find_nuclei()`` and/or ``select_nuclei()``. 1939 1940 To show current parameters, use: 1941 - ``current_parameters_chromatinization`` 1942 - ``current_parameters_img_adj_chro`` 1943 1944 To set new parameters, use: 1945 - ``set_chromatinization_size()`` 1946 - ``set_chromatinization_ratio()`` 1947 - ``set_chromatinization_cut_point()`` 1948 - ``set_adj_chrom_gamma()`` 1949 - ``set_adj_chrom_contrast()`` 1950 - ``set_adj_chrom_brightness()`` 1951 1952 To get analysis results, use: 1953 - ``get_results_nuclei_chromatinization()`` 1954 """ 1955 1956 def add_lists(f, g): 1957 1958 result = [] 1959 max_length = max(len(f), len(g)) 1960 1961 for i in range(max_length): 1962 f_elem = f[i] if i < len(f) else "" 1963 g_elem = g[i] if i < len(g) else "" 1964 result.append(f_elem + g_elem) 1965 1966 return result 1967 1968 def reverse_coords(image, x, y): 1969 1970 zero = np.zeros(image.shape) 1971 1972 zero[x, y] = 2**16 1973 1974 zero_indices = np.where(zero == 0) 1975 1976 return zero_indices[0], zero_indices[1] 1977 1978 if isinstance(self.nuclei_results["nuclei_reduced"], dict): 1979 nuclei_dictionary = self.nuclei_results["nuclei_reduced"] 1980 else: 1981 nuclei_dictionary = self.nuclei_results["nuclei"] 1982 1983 if nuclei_dictionary is not None: 1984 arrays_list = copy.deepcopy(nuclei_dictionary["coords"]) 1985 1986 chromatione_info = { 1987 "area": [], 1988 "area_bbox": [], 1989 "area_convex": [], 1990 "area_filled": [], 1991 "axis_major_length": [], 1992 "axis_minor_length": [], 1993 "eccentricity": [], 1994 "equivalent_diameter_area": [], 1995 "feret_diameter_max": [], 1996 "solidity": [], 1997 "perimeter": [], 1998 "perimeter_crofton": [], 1999 "coords": [], 2000 } 2001 2002 full_im = np.zeros(self.image.shape[0:2], dtype=np.uint16) 2003 full_im = adjust_img_16bit(full_im) 2004 2005 for arr in arrays_list: 2006 x = list(arr[:, 0]) 2007 y = list(arr[:, 1]) 2008 2009 x1, y1 = reverse_coords(self.image, x, y) 2010 2011 regions_chro2 = self.image.copy() 2012 2013 regions_chro2[x1, y1] = 0 2014 2015 regions_chro2 = regions_chro2.astype("uint16") 2016 2017 try: 2018 regions_chro2 = cv2.cvtColor(regions_chro2, cv2.COLOR_BGR2GRAY) 2019 except: 2020 pass 2021 2022 regions_chro2 = adjust_img_16bit( 2023 regions_chro2, 2024 brightness=self.img_adj_par_chrom["brightness"], 2025 contrast=self.img_adj_par_chrom["contrast"], 2026 gamma=self.img_adj_par_chrom["gamma"], 2027 ) 2028 2029 full_im = merge_images( 2030 image_list=[full_im, regions_chro2], intensity_factors=[1, 1] 2031 ) 2032 2033 ret, thresh = cv2.threshold( 2034 regions_chro2[x, y], 2035 0, 2036 2**16 - 1, 2037 cv2.THRESH_BINARY + cv2.THRESH_OTSU, 2038 ) 2039 2040 regions_chro2[ 2041 regions_chro2 2042 <= ret * self.hyperparameter_chromatinization["cut_point"] 2043 ] = 0 2044 2045 regions_chro2 = cv2.cvtColor(regions_chro2, cv2.COLOR_BGR2GRAY) 2046 2047 chromatione = regions_chro2 > 0 2048 2049 labeled_cells = measure.label(chromatione) 2050 regions = measure.regionprops(labeled_cells) 2051 regions = measure.regionprops( 2052 labeled_cells, intensity_image=regions_chro2 2053 ) 2054 2055 for region in regions: 2056 2057 chromatione_info["area"].append(region.area) 2058 chromatione_info["area_bbox"].append(region.area_bbox) 2059 chromatione_info["area_convex"].append(region.area_convex) 2060 chromatione_info["area_filled"].append(region.area_filled) 2061 chromatione_info["axis_major_length"].append( 2062 region.axis_major_length 2063 ) 2064 chromatione_info["axis_minor_length"].append( 2065 region.axis_minor_length 2066 ) 2067 chromatione_info["eccentricity"].append(region.eccentricity) 2068 chromatione_info["equivalent_diameter_area"].append( 2069 region.equivalent_diameter_area 2070 ) 2071 chromatione_info["feret_diameter_max"].append( 2072 region.feret_diameter_max 2073 ) 2074 chromatione_info["solidity"].append(region.solidity) 2075 chromatione_info["perimeter"].append(region.perimeter) 2076 chromatione_info["perimeter_crofton"].append( 2077 region.perimeter_crofton 2078 ) 2079 chromatione_info["coords"].append(region.coords) 2080 2081 ratios = [] 2082 2083 for min_len, max_len in zip( 2084 chromatione_info["axis_minor_length"], 2085 chromatione_info["axis_major_length"], 2086 ): 2087 if max_len != 0: 2088 ratio = min_len / max_len 2089 ratios.append(ratio) 2090 else: 2091 ratios.append(float(0.0)) 2092 2093 chromatione_info["ratio"] = ratios 2094 2095 chromation_dic = self.drop_dict( 2096 chromatione_info, 2097 key="area", 2098 var=self.hyperparameter_chromatinization["min_size"], 2099 action=">", 2100 ) 2101 chromation_dic = self.drop_dict( 2102 chromation_dic, 2103 key="area", 2104 var=self.hyperparameter_chromatinization["max_size"], 2105 action="<", 2106 ) 2107 chromation_dic = self.drop_dict( 2108 chromation_dic, 2109 key="ratio", 2110 var=self.hyperparameter_chromatinization["ratio"], 2111 action=">", 2112 ) 2113 2114 arrays_list2 = copy.deepcopy(chromation_dic["coords"]) 2115 2116 nuclei_dictionary["spot_size_area"] = [] 2117 nuclei_dictionary["spot_size_area_bbox"] = [] 2118 nuclei_dictionary["spot_size_area_convex"] = [] 2119 nuclei_dictionary["spot_size_area_filled"] = [] 2120 nuclei_dictionary["spot_axis_major_length"] = [] 2121 nuclei_dictionary["spot_axis_minor_length"] = [] 2122 nuclei_dictionary["spot_eccentricity"] = [] 2123 nuclei_dictionary["spot_size_equivalent_diameter_area"] = [] 2124 nuclei_dictionary["spot_feret_diameter_max"] = [] 2125 nuclei_dictionary["spot_perimeter"] = [] 2126 nuclei_dictionary["spot_perimeter_crofton"] = [] 2127 2128 for i, arr in enumerate(arrays_list): 2129 2130 spot_size_area = [] 2131 spot_size_area_bbox = [] 2132 spot_size_area_convex = [] 2133 spot_size_area_convex = [] 2134 spot_size_area_filled = [] 2135 spot_axis_major_length = [] 2136 spot_axis_minor_length = [] 2137 spot_eccentricity = [] 2138 spot_size_equivalent_diameter_area = [] 2139 spot_feret_diameter_max = [] 2140 spot_perimeter = [] 2141 spot_perimeter_crofton = [] 2142 2143 # Flatten the array, 2144 df_tmp = pd.DataFrame(arr) 2145 df_tmp["duplicates"] = add_lists( 2146 [str(x) for x in df_tmp[0]], [str(y) for y in df_tmp[1]] 2147 ) 2148 2149 counter_tmp = Counter(df_tmp["duplicates"]) 2150 2151 for j, arr2 in enumerate(arrays_list2): 2152 df_tmp2 = pd.DataFrame(arr2) 2153 df_tmp2["duplicates"] = add_lists( 2154 [str(x) for x in df_tmp2[0]], [str(y) for y in df_tmp2[1]] 2155 ) 2156 2157 counter_tmp2 = Counter(df_tmp2["duplicates"]) 2158 intersection_length = len(counter_tmp.keys() & counter_tmp2.keys()) 2159 min_length = min(len(counter_tmp), len(counter_tmp2)) 2160 2161 if intersection_length >= 0.8 * min_length: 2162 2163 if ( 2164 len(list(df_tmp2["duplicates"])) 2165 / len(list(df_tmp["duplicates"])) 2166 ) >= 0.025 and ( 2167 len(list(df_tmp2["duplicates"])) 2168 / len(list(df_tmp["duplicates"])) 2169 ) <= 0.5: 2170 spot_size_area.append(chromation_dic["area"][j]) 2171 spot_size_area_bbox.append(chromation_dic["area_bbox"][j]) 2172 spot_size_area_convex.append( 2173 chromation_dic["area_convex"][j] 2174 ) 2175 spot_size_area_filled.append( 2176 chromation_dic["area_filled"][j] 2177 ) 2178 spot_axis_major_length.append( 2179 chromation_dic["axis_major_length"][j] 2180 ) 2181 spot_axis_minor_length.append( 2182 chromation_dic["axis_minor_length"][j] 2183 ) 2184 spot_eccentricity.append(chromation_dic["eccentricity"][j]) 2185 spot_size_equivalent_diameter_area.append( 2186 chromation_dic["equivalent_diameter_area"][j] 2187 ) 2188 spot_feret_diameter_max.append( 2189 chromation_dic["feret_diameter_max"][j] 2190 ) 2191 spot_perimeter.append(chromation_dic["perimeter"][j]) 2192 spot_perimeter_crofton.append( 2193 chromation_dic["perimeter_crofton"][j] 2194 ) 2195 2196 nuclei_dictionary["spot_size_area"].append(spot_size_area) 2197 nuclei_dictionary["spot_size_area_bbox"].append(spot_size_area_bbox) 2198 nuclei_dictionary["spot_size_area_convex"].append(spot_size_area_convex) 2199 nuclei_dictionary["spot_size_area_filled"].append(spot_size_area_filled) 2200 nuclei_dictionary["spot_axis_major_length"].append( 2201 spot_axis_major_length 2202 ) 2203 nuclei_dictionary["spot_axis_minor_length"].append( 2204 spot_axis_minor_length 2205 ) 2206 nuclei_dictionary["spot_eccentricity"].append(spot_eccentricity) 2207 nuclei_dictionary["spot_size_equivalent_diameter_area"].append( 2208 spot_size_equivalent_diameter_area 2209 ) 2210 nuclei_dictionary["spot_feret_diameter_max"].append( 2211 spot_feret_diameter_max 2212 ) 2213 nuclei_dictionary["spot_perimeter"].append(spot_perimeter) 2214 nuclei_dictionary["spot_perimeter_crofton"].append( 2215 spot_perimeter_crofton 2216 ) 2217 2218 self.nuclei_results["chromatinization"] = chromation_dic 2219 self.nuclei_results["nuclei_chromatinization"] = nuclei_dictionary 2220 2221 self.images["nuclei_chromatinization"] = self.create_mask( 2222 chromation_dic, self.image 2223 ) 2224 2225 img_chrom = adjust_img_16bit( 2226 cv2.cvtColor( 2227 self.create_mask( 2228 self.nuclei_results["chromatinization"], self.image 2229 ), 2230 cv2.COLOR_BGR2GRAY, 2231 ), 2232 color="yellow", 2233 ) 2234 2235 if isinstance(self.nuclei_results["nuclei_reduced"], dict): 2236 nuclei_mask = adjust_img_16bit( 2237 cv2.cvtColor( 2238 self.create_mask( 2239 self.nuclei_results["nuclei_reduced"], self.image 2240 ), 2241 cv2.COLOR_BGR2GRAY, 2242 ), 2243 color="blue", 2244 ) 2245 else: 2246 nuclei_mask = adjust_img_16bit( 2247 cv2.cvtColor( 2248 self.create_mask(self.nuclei_results["nuclei"], self.image), 2249 cv2.COLOR_BGR2GRAY, 2250 ), 2251 color="blue", 2252 ) 2253 2254 nuclei_mask = merge_images([nuclei_mask, img_chrom], [1, 1]) 2255 2256 try: 2257 img = cv2.cvtColor(full_im, cv2.COLOR_BGR2GRAY) 2258 except: 2259 img = full_im 2260 2261 oryginal = adjust_img_16bit(img, color="gray") 2262 2263 concatenated_image = cv2.hconcat([oryginal, nuclei_mask]) 2264 2265 self.images["nuclei_chromatinization"] = concatenated_image 2266 2267 if cfg._DISPLAY_MODE: 2268 if self.show_plots: 2269 display_preview( 2270 self.resize_to_screen_img( 2271 self.images["nuclei_chromatinization"] 2272 ) 2273 ) 2274 2275 else: 2276 print("Lack of nuclei data to select!") 2277 2278 # separate function for chromatinization 2279 2280 def _nuclei_chromatinization_series(self, image, nuclei_data): 2281 """ 2282 Helper method for performing chromatinization analysis on nuclei detected in the provided image. 2283 """ 2284 2285 def add_lists(f, g): 2286 result = [] 2287 max_length = max(len(f), len(g)) 2288 2289 for i in range(max_length): 2290 f_elem = f[i] if i < len(f) else "" 2291 g_elem = g[i] if i < len(g) else "" 2292 result.append(f_elem + g_elem) 2293 2294 return result 2295 2296 def reverse_coords(image, x, y): 2297 2298 zero = np.zeros(image.shape) 2299 2300 zero[x, y] = 2**16 2301 2302 zero_indices = np.where(zero == 0) 2303 2304 return zero_indices[0], zero_indices[1] 2305 2306 nuclei_dictionary = nuclei_data.copy() 2307 2308 if nuclei_dictionary is not None: 2309 arrays_list = copy.deepcopy(nuclei_dictionary["coords"]) 2310 2311 chromatione_info = { 2312 "area": [], 2313 "area_bbox": [], 2314 "area_convex": [], 2315 "area_filled": [], 2316 "axis_major_length": [], 2317 "axis_minor_length": [], 2318 "eccentricity": [], 2319 "equivalent_diameter_area": [], 2320 "feret_diameter_max": [], 2321 "solidity": [], 2322 "perimeter": [], 2323 "perimeter_crofton": [], 2324 "coords": [], 2325 } 2326 2327 full_im = np.zeros(image.shape[0:2], dtype=np.uint16) 2328 full_im = adjust_img_16bit(full_im) 2329 2330 for arr in arrays_list: 2331 x = list(arr[:, 0]) 2332 y = list(arr[:, 1]) 2333 2334 x1, y1 = reverse_coords(image, x, y) 2335 2336 regions_chro2 = image.copy() 2337 2338 regions_chro2[x1, y1] = 0 2339 2340 regions_chro2 = regions_chro2.astype("uint16") 2341 2342 try: 2343 regions_chro2 = cv2.cvtColor(regions_chro2, cv2.COLOR_BGR2GRAY) 2344 except: 2345 pass 2346 2347 regions_chro2 = adjust_img_16bit( 2348 regions_chro2, 2349 brightness=self.img_adj_par_chrom["brightness"], 2350 contrast=self.img_adj_par_chrom["contrast"], 2351 gamma=self.img_adj_par_chrom["gamma"], 2352 ) 2353 2354 full_im = merge_images( 2355 image_list=[full_im, regions_chro2], intensity_factors=[1, 1] 2356 ) 2357 2358 ret, _ = cv2.threshold( 2359 regions_chro2[x, y], 2360 0, 2361 2**16 - 1, 2362 cv2.THRESH_BINARY + cv2.THRESH_OTSU, 2363 ) 2364 2365 regions_chro2[ 2366 regions_chro2 2367 <= ret * self.hyperparameter_chromatinization["cut_point"] 2368 ] = 0 2369 2370 regions_chro2 = cv2.cvtColor(regions_chro2, cv2.COLOR_BGR2GRAY) 2371 2372 chromatione = regions_chro2 > 0 2373 2374 labeled_cells = measure.label(chromatione) 2375 regions = measure.regionprops(labeled_cells) 2376 regions = measure.regionprops( 2377 labeled_cells, intensity_image=regions_chro2 2378 ) 2379 2380 for region in regions: 2381 2382 chromatione_info["area"].append(region.area) 2383 chromatione_info["area_bbox"].append(region.area_bbox) 2384 chromatione_info["area_convex"].append(region.area_convex) 2385 chromatione_info["area_filled"].append(region.area_filled) 2386 chromatione_info["axis_major_length"].append( 2387 region.axis_major_length 2388 ) 2389 chromatione_info["axis_minor_length"].append( 2390 region.axis_minor_length 2391 ) 2392 chromatione_info["eccentricity"].append(region.eccentricity) 2393 chromatione_info["equivalent_diameter_area"].append( 2394 region.equivalent_diameter_area 2395 ) 2396 chromatione_info["feret_diameter_max"].append( 2397 region.feret_diameter_max 2398 ) 2399 chromatione_info["solidity"].append(region.solidity) 2400 chromatione_info["perimeter"].append(region.perimeter) 2401 chromatione_info["perimeter_crofton"].append( 2402 region.perimeter_crofton 2403 ) 2404 chromatione_info["coords"].append(region.coords) 2405 2406 ratios = [] 2407 2408 for min_len, max_len in zip( 2409 chromatione_info["axis_minor_length"], 2410 chromatione_info["axis_major_length"], 2411 ): 2412 if max_len != 0: 2413 ratio = min_len / max_len 2414 ratios.append(ratio) 2415 else: 2416 ratios.append(float(0.0)) 2417 2418 chromatione_info["ratio"] = ratios 2419 2420 chromation_dic = self.drop_dict( 2421 chromatione_info, 2422 key="area", 2423 var=self.hyperparameter_chromatinization["min_size"], 2424 action=">", 2425 ) 2426 chromation_dic = self.drop_dict( 2427 chromation_dic, 2428 key="area", 2429 var=self.hyperparameter_chromatinization["max_size"], 2430 action="<", 2431 ) 2432 chromation_dic = self.drop_dict( 2433 chromation_dic, 2434 key="ratio", 2435 var=self.hyperparameter_chromatinization["ratio"], 2436 action=">", 2437 ) 2438 2439 arrays_list2 = copy.deepcopy(chromation_dic["coords"]) 2440 2441 nuclei_dictionary["spot_size_area"] = [] 2442 nuclei_dictionary["spot_size_area_bbox"] = [] 2443 nuclei_dictionary["spot_size_area_convex"] = [] 2444 nuclei_dictionary["spot_size_area_filled"] = [] 2445 nuclei_dictionary["spot_axis_major_length"] = [] 2446 nuclei_dictionary["spot_axis_minor_length"] = [] 2447 nuclei_dictionary["spot_eccentricity"] = [] 2448 nuclei_dictionary["spot_size_equivalent_diameter_area"] = [] 2449 nuclei_dictionary["spot_feret_diameter_max"] = [] 2450 nuclei_dictionary["spot_perimeter"] = [] 2451 nuclei_dictionary["spot_perimeter_crofton"] = [] 2452 2453 for arr in arrays_list: 2454 2455 spot_size_area = [] 2456 spot_size_area_bbox = [] 2457 spot_size_area_convex = [] 2458 spot_size_area_convex = [] 2459 spot_size_area_filled = [] 2460 spot_axis_major_length = [] 2461 spot_axis_minor_length = [] 2462 spot_eccentricity = [] 2463 spot_size_equivalent_diameter_area = [] 2464 spot_feret_diameter_max = [] 2465 spot_perimeter = [] 2466 spot_perimeter_crofton = [] 2467 2468 # Flatten the array, 2469 df_tmp = pd.DataFrame(arr) 2470 df_tmp["duplicates"] = add_lists( 2471 [str(x) for x in df_tmp[0]], [str(y) for y in df_tmp[1]] 2472 ) 2473 2474 counter_tmp = Counter(df_tmp["duplicates"]) 2475 2476 for j, arr2 in enumerate(arrays_list2): 2477 df_tmp2 = pd.DataFrame(arr2) 2478 df_tmp2["duplicates"] = add_lists( 2479 [str(x) for x in df_tmp2[0]], [str(y) for y in df_tmp2[1]] 2480 ) 2481 2482 counter_tmp2 = Counter(df_tmp2["duplicates"]) 2483 intersection_length = len(counter_tmp.keys() & counter_tmp2.keys()) 2484 min_length = min(len(counter_tmp), len(counter_tmp2)) 2485 2486 if intersection_length >= 0.8 * min_length: 2487 2488 if ( 2489 len(list(df_tmp2["duplicates"])) 2490 / len(list(df_tmp["duplicates"])) 2491 ) >= 0.025 and ( 2492 len(list(df_tmp2["duplicates"])) 2493 / len(list(df_tmp["duplicates"])) 2494 ) <= 0.5: 2495 spot_size_area.append(chromation_dic["area"][j]) 2496 spot_size_area_bbox.append(chromation_dic["area_bbox"][j]) 2497 spot_size_area_convex.append( 2498 chromation_dic["area_convex"][j] 2499 ) 2500 spot_size_area_filled.append( 2501 chromation_dic["area_filled"][j] 2502 ) 2503 spot_axis_major_length.append( 2504 chromation_dic["axis_major_length"][j] 2505 ) 2506 spot_axis_minor_length.append( 2507 chromation_dic["axis_minor_length"][j] 2508 ) 2509 spot_eccentricity.append(chromation_dic["eccentricity"][j]) 2510 spot_size_equivalent_diameter_area.append( 2511 chromation_dic["equivalent_diameter_area"][j] 2512 ) 2513 spot_feret_diameter_max.append( 2514 chromation_dic["feret_diameter_max"][j] 2515 ) 2516 spot_perimeter.append(chromation_dic["perimeter"][j]) 2517 spot_perimeter_crofton.append( 2518 chromation_dic["perimeter_crofton"][j] 2519 ) 2520 2521 nuclei_dictionary["spot_size_area"].append(spot_size_area) 2522 nuclei_dictionary["spot_size_area_bbox"].append(spot_size_area_bbox) 2523 nuclei_dictionary["spot_size_area_convex"].append(spot_size_area_convex) 2524 nuclei_dictionary["spot_size_area_filled"].append(spot_size_area_filled) 2525 nuclei_dictionary["spot_axis_major_length"].append( 2526 spot_axis_major_length 2527 ) 2528 nuclei_dictionary["spot_axis_minor_length"].append( 2529 spot_axis_minor_length 2530 ) 2531 nuclei_dictionary["spot_eccentricity"].append(spot_eccentricity) 2532 nuclei_dictionary["spot_size_equivalent_diameter_area"].append( 2533 spot_size_equivalent_diameter_area 2534 ) 2535 nuclei_dictionary["spot_feret_diameter_max"].append( 2536 spot_feret_diameter_max 2537 ) 2538 nuclei_dictionary["spot_perimeter"].append(spot_perimeter) 2539 nuclei_dictionary["spot_perimeter_crofton"].append( 2540 spot_perimeter_crofton 2541 ) 2542 2543 self.nuclei_results["chromatinization"] = chromation_dic 2544 self.nuclei_results["nuclei_chromatinization"] = nuclei_dictionary 2545 2546 self.images["nuclei_chromatinization"] = self.create_mask( 2547 chromation_dic, image 2548 ) 2549 2550 img_chrom = adjust_img_16bit( 2551 cv2.cvtColor( 2552 self.create_mask(self.nuclei_results["chromatinization"], image), 2553 cv2.COLOR_BGR2GRAY, 2554 ), 2555 color="yellow", 2556 ) 2557 2558 nuclei_mask = adjust_img_16bit( 2559 cv2.cvtColor(self.create_mask(nuclei_data, image), cv2.COLOR_BGR2GRAY), 2560 color="blue", 2561 ) 2562 2563 nuclei_mask = merge_images([nuclei_mask, img_chrom], [1, 1]) 2564 2565 try: 2566 img = cv2.cvtColor(full_im, cv2.COLOR_BGR2GRAY) 2567 except: 2568 img = full_im 2569 2570 oryginal = adjust_img_16bit(img, color="gray") 2571 2572 concatenated_image = cv2.hconcat([oryginal, nuclei_mask]) 2573 2574 self.images["nuclei_chromatinization"] = concatenated_image 2575 2576 if cfg._DISPLAY_MODE: 2577 if self.show_plots: 2578 display_preview( 2579 self.resize_to_screen_img( 2580 self.images["nuclei_chromatinization"] 2581 ) 2582 ) 2583 2584 else: 2585 print("Lack of nuclei data to select!") 2586 2587 def browser_test(self): 2588 """ 2589 Displays test results generated by the ``nuclei_finder_test()`` method 2590 in the default web browser. 2591 """ 2592 2593 html_content = "" 2594 2595 for fig in self.test_results: 2596 buf = BytesIO() 2597 fig.savefig(buf, format="png", bbox_inches="tight") 2598 buf.seek(0) 2599 2600 img_base64 = base64.b64encode(buf.read()).decode("utf-8") 2601 2602 html_content += f'<img src="data:image/png;base64,{img_base64}" style="margin:10px;"/>\n' 2603 2604 with tempfile.NamedTemporaryFile( 2605 mode="w", delete=False, suffix=".html" 2606 ) as tmp_file: 2607 tmp_file.write(html_content) 2608 tmp_filename = tmp_file.name 2609 2610 webbrowser.open_new_tab(tmp_filename) 2611 2612 def series_analysis_chromatinization( 2613 self, 2614 path_to_images: str, 2615 file_extension: str = "tiff", 2616 selected_id: list = [], 2617 fille_name_part: str = "", 2618 selection_opt: bool = True, 2619 include_img: bool = True, 2620 test_series: int = 0, 2621 ): 2622 """ 2623 Performs full analysis on images provided via the ``input_image()`` method 2624 using default or user-defined parameters. 2625 2626 This method runs nuclei detection, nuclei selection, and chromatinization 2627 analysis in a single pipeline. Users can adjust parameters for each step 2628 before running the analysis. 2629 2630 To show current parameters, use: 2631 - ``current_parameters_nuclei`` 2632 - ``current_parameters_img_adj`` 2633 - ``current_parameters_chromatinization`` 2634 - ``current_parameters_img_adj_chro`` 2635 2636 To set new parameters, use: 2637 - ``set_nms()`` 2638 - ``set_prob()`` 2639 - ``set_adj_image_gamma()`` 2640 - ``set_adj_image_contrast()`` 2641 - ``set_adj_image_brightness()`` 2642 - ``set_nuclei_circularity()`` 2643 - ``set_nuclei_size()`` 2644 - ``set_nuclei_min_mean_intensity()`` 2645 - ``set_chromatinization_size()`` 2646 - ``set_chromatinization_ratio()`` 2647 - ``set_chromatinization_cut_point()`` 2648 - ``set_adj_chrom_gamma()`` 2649 - ``set_adj_chrom_contrast()`` 2650 - ``set_adj_chrom_brightness()`` 2651 2652 Parameters 2653 ---------- 2654 path_to_images : str 2655 Path to the directory containing images for analysis. 2656 2657 file_extension : str, optional 2658 Extension of the image files. Default is 'tiff'. 2659 2660 selected_id : list, optional 2661 List of IDs that must be part of the image name to distinguish them 2662 from others. Default is an empty list, which means all images in 2663 the directory will be processed. 2664 2665 fille_name_part : str, optional 2666 Part of the file name to filter images. Default is an empty string. 2667 2668 selection_opt : bool, optional 2669 Whether to run ``select_nuclei()`` with the defined parameters. Default is True. 2670 2671 include_img : bool, optional 2672 Whether to include the images in the result dictionary. Default is True. 2673 2674 test_series : int, optional 2675 Number of images to test the parameters and return results. Default is 0, 2676 which means all images in the directory will be processed. 2677 2678 Returns 2679 ------- 2680 results_dict : dict 2681 Dictionary containing results for each image in the directory. 2682 Keys correspond to image file names. 2683 2684 Notes 2685 ----- 2686 This method runs the complete nuclei and chromatinization analysis pipeline. 2687 2688 Parameters must be set appropriately before calling to ensure correct results. 2689 """ 2690 2691 results_dict = {} 2692 results_img = {} 2693 results_img_raw = {} 2694 2695 files = glob.glob(os.path.join(path_to_images, "*." + file_extension)) 2696 2697 if len(fille_name_part) > 0: 2698 files = [x for x in files if fille_name_part.lower() in x.lower()] 2699 2700 if len(selected_id) > 0: 2701 selected_id = [str(x) for x in selected_id] 2702 files = [ 2703 x 2704 for x in files 2705 if re.sub("_.*", "", os.path.basename(x)) in selected_id 2706 ] 2707 2708 if test_series > 0: 2709 2710 files = random.sample(files, test_series) 2711 2712 self.show_plots = False 2713 self.series_im = True 2714 2715 print("\nFile analysis:\n\n") 2716 2717 for file in tqdm(files): 2718 2719 print(file) 2720 2721 self.show_plots = False 2722 2723 image = self.load_image(file) 2724 2725 self.input_image(image) 2726 2727 self.find_nuclei() 2728 2729 tmp = None 2730 2731 if selection_opt is True: 2732 self.select_nuclei() 2733 tmp = self.get_results_nuclei_selected() 2734 2735 else: 2736 tmp = self.get_results_nuclei() 2737 2738 if tmp is not None: 2739 2740 if tmp[0] is not None: 2741 2742 results_dict[str(os.path.basename(file))] = tmp[0] 2743 results_img[str(os.path.basename(file))] = tmp[1] 2744 results_img_raw[str(os.path.basename(file))] = image 2745 del tmp 2746 del image 2747 2748 results_dict_tmp = self.repairing_nuclei(results_dict) 2749 2750 results_dict = {} 2751 2752 print("\nChromatization searching:\n\n") 2753 2754 for ke in tqdm(results_dict_tmp.keys()): 2755 2756 tmp = None 2757 2758 try: 2759 self._nuclei_chromatinization_series( 2760 results_img_raw[ke], results_dict_tmp[ke] 2761 ) 2762 tmp = self.get_results_nuclei_chromatinization() 2763 except: 2764 print(f"Sample {ke} could not be processed.") 2765 2766 if tmp is not None: 2767 2768 if tmp[0] is not None: 2769 2770 tmp[0].pop("coords") 2771 2772 if include_img: 2773 results_dict[str(os.path.basename(ke))] = { 2774 "stats": tmp[0], 2775 "img": cv2.hconcat([results_img[ke], tmp[1]]), 2776 } 2777 del tmp 2778 else: 2779 results_dict[str(os.path.basename(ke))] = tmp[0] 2780 del tmp 2781 2782 else: 2783 print(f"Unable to obtain results for {print(ke)}") 2784 2785 self.show_plots = True 2786 self.series_im = False 2787 2788 return results_dict 2789 2790 def series_analysis_nuclei( 2791 self, 2792 path_to_images: str, 2793 file_extension: str = "tiff", 2794 selected_id: list = [], 2795 fille_name_part: str = "", 2796 selection_opt: bool = True, 2797 include_img: bool = True, 2798 test_series: int = 0, 2799 ): 2800 """ 2801 Performs analysis on the image provided by the ``input_image()`` method 2802 using default or user-defined parameters. 2803 2804 This method runs nuclei detection and selection using the currently set 2805 parameters. Users can adjust image preprocessing and nuclei detection 2806 parameters before running the analysis. 2807 2808 To show current parameters, use: 2809 - ``current_parameters_nuclei`` 2810 - ``current_parameters_img_adj`` 2811 2812 To set new parameters, use: 2813 - ``set_nms()`` 2814 - ``set_prob()`` 2815 - ``set_adj_image_gamma()`` 2816 - ``set_adj_image_contrast()`` 2817 - ``set_adj_image_brightness()`` 2818 - ``set_nuclei_circularity()`` 2819 - ``set_nuclei_size()`` 2820 - ``set_nuclei_min_mean_intensity()`` 2821 2822 Parameters 2823 ---------- 2824 path_to_images : str 2825 Path to the directory containing images for analysis. 2826 2827 file_extension : str, optional 2828 Extension of the image files. Default is 'tiff'. 2829 2830 selected_id : list, optional 2831 List of IDs that must be part of the image name to distinguish them 2832 from others. Default is an empty list, which means all images in 2833 the directory will be processed. 2834 2835 fille_name_part : str, optional 2836 Part of the file name to filter images. Default is an empty string. 2837 2838 selection_opt : bool, optional 2839 Whether to run the ``select_nuclei()`` method with the defined parameters. 2840 Default is True. 2841 2842 include_img : bool, optional 2843 Whether to include the images in the result dictionary. Default is True. 2844 2845 test_series : int, optional 2846 Number of images to test the parameters and return results. Default is 0, 2847 which means all images in the directory will be processed. 2848 2849 Returns 2850 ------- 2851 results_dict : dict 2852 Dictionary containing results for each image in the directory. 2853 Keys correspond to image file names. 2854 """ 2855 2856 results_dict = {} 2857 results_img = {} 2858 2859 files = glob.glob(os.path.join(path_to_images, "*." + file_extension)) 2860 2861 if len(fille_name_part) > 0: 2862 files = [x for x in files if fille_name_part.lower() in x.lower()] 2863 2864 if len(selected_id) > 0: 2865 selected_id = [str(x) for x in selected_id] 2866 files = [ 2867 x 2868 for x in files 2869 if re.sub("_.*", "", os.path.basename(x)) in selected_id 2870 ] 2871 2872 if test_series > 0: 2873 2874 files = random.sample(files, test_series) 2875 2876 self.show_plots = False 2877 self.series_im = True 2878 2879 print("\nFile analysis:\n\n") 2880 2881 for file in tqdm(files): 2882 2883 print(file) 2884 2885 image = self.load_image(file) 2886 2887 self.input_image(image) 2888 2889 self.find_nuclei() 2890 2891 if self.nuclei_results["nuclei"] is not None: 2892 2893 tmp = [None] 2894 2895 if selection_opt is True: 2896 self.select_nuclei() 2897 tmp = self.get_results_nuclei_selected() 2898 2899 else: 2900 tmp = self.get_results_nuclei() 2901 2902 if tmp is not None: 2903 2904 if tmp[0] is not None: 2905 2906 if include_img: 2907 results_dict[str(os.path.basename(file))] = tmp[0] 2908 results_img[str(os.path.basename(file))] = tmp[1] 2909 2910 del tmp 2911 2912 else: 2913 results_dict[str(os.path.basename(file))] = tmp[0] 2914 del tmp 2915 2916 else: 2917 print(f"Unable to obtain results for {print(file)}") 2918 2919 else: 2920 2921 print(f"Unable to obtain results for {print(file)}") 2922 2923 self.show_plots = True 2924 self.series_im = False 2925 2926 results_dict_tmp = self.repairing_nuclei(results_dict) 2927 2928 if include_img is False: 2929 2930 return results_dict_tmp 2931 2932 else: 2933 2934 results_dict = {} 2935 2936 for ke in results_dict_tmp.keys(): 2937 2938 nuclei_mask = adjust_img_16bit( 2939 cv2.cvtColor( 2940 self.create_mask(results_dict_tmp[ke], results_img[ke]), 2941 cv2.COLOR_BGR2GRAY, 2942 ), 2943 color="blue", 2944 ) 2945 concatenated_image = cv2.hconcat([results_img[ke], nuclei_mask]) 2946 2947 cred = results_dict_tmp[ke] 2948 # cred.pop('coords') 2949 2950 results_dict[ke] = {"stats": cred, "img": concatenated_image} 2951 2952 return results_dict
Implements a comprehensive pipeline for automated segmentation, selection, and analysis of cell nuclei and their internal chromatin structure in microscopy images.
It utilizes a pre-trained deep learning model (StarDist2D) for initial nuclear identification, followed by the application of advanced morphological and intensity filters, and a dedicated algorithm for quantifying chromatinization. The class provides detailed control over the hyperparameters for both the segmentation process and image preprocessing stages.
Parameters
image : np.ndarray, optional The input image (typically 16-bit) for analysis.
test_results : list, optional Plots resulting from parameter testing (e.g., NMS/Prob combinations).
hyperparameter_nuclei : dict, optional Parameters for nuclei segmentation and filtering (e.g., 'nms', 'prob', 'min_size', 'circularity').
hyperparameter_chromatinization : dict, optional Parameters for segmenting and filtering chromatin spots (e.g., 'cut_point', 'ratio').
img_adj_par_chrom : dict, optional Image adjustment parameters (gamma, contrast) specifically for chromatin analysis.
img_adj_par : dict, optional Image adjustment parameters for nuclei segmentation.
show_plots : bool, optional Flag controlling the automatic display of visual results.
nuclei_results : dict, optional A dictionary storing numerical data (features) extracted from the nuclei.
images : dict, optional A dictionary storing output images and masks.
Attributes
image : np.ndarray The currently loaded image for analysis.
test_results : list The visual outcomes of NMS/Prob parameter tests.
hyperparameter_nuclei : dict
A dictionary of active parameters used by the find_nuclei() and select_nuclei() methods.
hyperparameter_chromatinization : dict
A dictionary of active parameters used by the nuclei_chromatinization() method.
img_adj_par : dict Image correction parameters for nuclei segmentation.
img_adj_par_chrom : dict Image correction parameters for chromatin analysis.
show_plots : bool The state of the plot display flag.
nuclei_results : dict Stores feature dictionaries for: all detected ('nuclei'), selected ('nuclei_reduced'), and chromatinization data ('nuclei_chromatinization').
images : dict Stores masks and images visualizing the results.
series_im : bool Flag indicating if the class is operating in a batch or series processing mode.
Methods
set_nms(nms) Sets the Non-Maximum Suppression (NMS) threshold.
set_prob(prob) Sets the segmentation probability threshold.
set_nuclei_circularity(circ) Sets the minimum required circularity for a nucleus.
set_nuclei_local_intenisty_FC(local_FC) Sets the factor used for removing false positives based on local intensity differences.
set_nuclei_global_area_FC(global_FC) Sets the factor used for removing size-based outlier false positives.
set_nuclei_size(size) Sets the minimum and maximum area (in pixels) for nuclei selection.
set_nuclei_min_mean_intensity(intensity) Sets the minimum required mean intensity value for a nucleus.
set_chromatinization_size(size) Sets the minimum and maximum area (in pixels) for chromatin spot selection.
set_chromatinization_cut_point(cut_point) Sets the factor used to adjust the chromatin segmentation threshold (Otsu's method).
set_adj_image_gamma(gamma) Sets the gamma correction for the nuclei image.
set_adj_chrom_contrast(contrast) Sets the contrast adjustment for the chromatinization image.
current_parameters_nuclei (property) Returns the active nuclei segmentation and filtering parameters.
find_nuclei() Performs nuclei segmentation using StarDist and extracts initial features.
select_nuclei() Filters the detected nuclei based on set morphological and intensity criteria.
nuclei_chromatinization() Performs quantitative and morphological analysis of chromatin spots in selected nuclei.
get_features(model_out, image) Calculates geometric and intensity features from a segmented mask (label image).
Notes
The typical analysis workflow follows this order:
input_image()find_nuclei()select_nuclei()(Optional)nuclei_chromatinization()(Optional)
1095 def __init__( 1096 self, 1097 image=None, 1098 test_results=None, 1099 hyperparameter_nuclei=None, 1100 hyperparameter_chromatinization=None, 1101 img_adj_par_chrom=None, 1102 img_adj_par=None, 1103 show_plots=None, 1104 nuclei_results=None, 1105 images=None, 1106 ): 1107 """ 1108 The main class for the detection and analysis of cell nuclei and their chromatinization 1109 in microscopy or flow cytometry images, utilizing the StarDist segmentation model. 1110 1111 This class inherits functionality for image processing (ImageTools) and 1112 results handling (RepTools). 1113 1114 Parameters 1115 ---------- 1116 image : np.ndarray, optional 1117 The input image for analysis. 1118 Default: None. 1119 1120 test_results : list, optional 1121 A list of plots or images resulting from parameter testing. 1122 Default: None. 1123 1124 hyperparameter_nuclei : dict, optional 1125 The segmentation parameters for nuclei detection. 1126 Default: 1127 {'nms': 0.8, 'prob': 0.4, 'max_size': 1000, 'min_size': 20, 1128 'circularity': 0.6, 'intensity_mean': 6553.5, 'nn_min': 10, 1129 'FC_diff_global': 1.5, 'FC_diff_local_intensity': 0.6} 1130 1131 hyperparameter_chromatinization : dict, optional 1132 The analysis parameters for chromatin spots within the nuclei. 1133 Default: 1134 {'max_size': 800, 'min_size': 2, 'ratio': 0.1, 'cut_point': 0.95} 1135 1136 img_adj_par_chrom : dict, optional 1137 Image adjustment parameters (gamma, contrast, brightness) for chromatin analysis. 1138 Default: {'gamma': 0.25, 'contrast': 5, 'brightness': 950} 1139 1140 img_adj_par : dict, optional 1141 Image adjustment parameters (gamma, contrast, brightness) for nuclei segmentation. 1142 Default: {'gamma': 0.9, 'contrast': 2, 'brightness': 1000} 1143 1144 show_plots : bool, optional 1145 Flag to determine whether results and plots should be displayed automatically. 1146 Default: True. 1147 1148 nuclei_results : dict, optional 1149 A dictionary storing the numerical results of the analysis. 1150 Default: {'nuclei': None, 'nuclei_reduced': None, 'nuclei_chromatinization': None} 1151 1152 images : dict, optional 1153 A dictionary storing the output images (e.g., masks). 1154 Default: {'nuclei': None, 'nuclei_reduced': None, 'nuclei_chromatinization': None} 1155 1156 Attributes 1157 ---------- 1158 image : np.ndarray 1159 The currently loaded image for analysis. 1160 1161 hyperparameter_nuclei : dict 1162 Active nuclei segmentation parameters. 1163 1164 hyperparameter_chromatinization : dict 1165 Active chromatinization analysis parameters. 1166 1167 img_adj_par : dict 1168 Active image correction parameters for nuclei segmentation. 1169 1170 img_adj_par_chrom : dict 1171 Active image correction parameters for chromatin analysis. 1172 1173 show_plots : bool 1174 The current state of the plot display flag. 1175 1176 series_im : bool 1177 Flag indicating if a series of images is being processed. 1178 1179 Notes 1180 ----- 1181 The default value for 'intensity_mean' in hyperparameter_nuclei is calculated 1182 as $(2^{16} - 1) / 10$, which represents 10% of the maximum 16-bit value (65535 / 10 = 6553.5). 1183 1184 The image adjustment parameters are crucial for optimizing contrast and brightness 1185 to improve the performance of both the StarDist model and the subsequent 1186 chromatin thresholding. 1187 """ 1188 1189 # Use default values if parameters are None 1190 self.image = image or None 1191 """Loaded input image.""" 1192 self.test_results = test_results or None 1193 """Results of parameter tests. 1194 1195 This attribute or method stores the outcomes of parameter testing procedures. 1196 For interactive browsing and inspection of the results, use the 1197 `browser_test(self)` method.""" 1198 1199 self.hyperparameter_nuclei = hyperparameter_nuclei or { 1200 "nms": 0.8, 1201 "prob": 0.4, 1202 "max_size": 1000, 1203 "min_size": 20, 1204 "circularity": 0.6, 1205 "intensity_mean": (2**16 - 1) / 10, 1206 "nn_min": 10, 1207 "FC_diff_global": 1.5, 1208 "FC_diff_local_intensity": 0.6, 1209 } 1210 """Active nuclei segmentation/filter parameters.""" 1211 1212 self.hyperparameter_chromatinization = hyperparameter_chromatinization or { 1213 "max_size": 800, 1214 "min_size": 2, 1215 "ratio": 0.1, 1216 "cut_point": 0.95, 1217 } 1218 """Active chromatin analysis parameters.""" 1219 1220 self.img_adj_par_chrom = img_adj_par_chrom or { 1221 "gamma": 0.25, 1222 "contrast": 5, 1223 "brightness": 950, 1224 } 1225 """Image adjustment for chromatin analysis.""" 1226 1227 self.img_adj_par = img_adj_par or { 1228 "gamma": 0.9, 1229 "contrast": 2, 1230 "brightness": 1000, 1231 } 1232 """Image adjustment for nuclei segmentation.""" 1233 1234 self.show_plots = show_plots or True 1235 """Flag controlling plot display.""" 1236 1237 self.nuclei_results = nuclei_results or { 1238 "nuclei": None, 1239 "nuclei_reduced": None, 1240 "nuclei_chromatinization": None, 1241 } 1242 """Stored dictionary of nuclei analysis results.""" 1243 1244 self.images = images or { 1245 "nuclei": None, 1246 "nuclei_reduced": None, 1247 "nuclei_chromatinization": None, 1248 } 1249 """Stored dictionary of images from nuclei analysis.""" 1250 1251 # sereies images 1252 self.series_im = False 1253 """Flag for batch/series image processing."""
The main class for the detection and analysis of cell nuclei and their chromatinization in microscopy or flow cytometry images, utilizing the StarDist segmentation model.
This class inherits functionality for image processing (ImageTools) and results handling (RepTools).
Parameters
image : np.ndarray, optional The input image for analysis. Default: None.
test_results : list, optional A list of plots or images resulting from parameter testing. Default: None.
hyperparameter_nuclei : dict, optional The segmentation parameters for nuclei detection. Default: {'nms': 0.8, 'prob': 0.4, 'max_size': 1000, 'min_size': 20, 'circularity': 0.6, 'intensity_mean': 6553.5, 'nn_min': 10, 'FC_diff_global': 1.5, 'FC_diff_local_intensity': 0.6}
hyperparameter_chromatinization : dict, optional The analysis parameters for chromatin spots within the nuclei. Default: {'max_size': 800, 'min_size': 2, 'ratio': 0.1, 'cut_point': 0.95}
img_adj_par_chrom : dict, optional Image adjustment parameters (gamma, contrast, brightness) for chromatin analysis. Default: {'gamma': 0.25, 'contrast': 5, 'brightness': 950}
img_adj_par : dict, optional Image adjustment parameters (gamma, contrast, brightness) for nuclei segmentation. Default: {'gamma': 0.9, 'contrast': 2, 'brightness': 1000}
show_plots : bool, optional Flag to determine whether results and plots should be displayed automatically. Default: True.
nuclei_results : dict, optional A dictionary storing the numerical results of the analysis. Default: {'nuclei': None, 'nuclei_reduced': None, 'nuclei_chromatinization': None}
images : dict, optional A dictionary storing the output images (e.g., masks). Default: {'nuclei': None, 'nuclei_reduced': None, 'nuclei_chromatinization': None}
Attributes
image : np.ndarray The currently loaded image for analysis.
hyperparameter_nuclei : dict Active nuclei segmentation parameters.
hyperparameter_chromatinization : dict Active chromatinization analysis parameters.
img_adj_par : dict Active image correction parameters for nuclei segmentation.
img_adj_par_chrom : dict Active image correction parameters for chromatin analysis.
show_plots : bool The current state of the plot display flag.
series_im : bool Flag indicating if a series of images is being processed.
Notes
The default value for 'intensity_mean' in hyperparameter_nuclei is calculated as $(2^{16} - 1) / 10$, which represents 10% of the maximum 16-bit value (65535 / 10 = 6553.5).
The image adjustment parameters are crucial for optimizing contrast and brightness to improve the performance of both the StarDist model and the subsequent chromatin thresholding.
Results of parameter tests.
This attribute or method stores the outcomes of parameter testing procedures.
For interactive browsing and inspection of the results, use the
browser_test(self) method.
1255 def set_nms(self, nms: float): 1256 """ 1257 Set the Non-Maximum Suppression (NMS) threshold. 1258 1259 The NMS threshold controls how aggressively overlapping detections are suppressed. 1260 A lower value reduces the probability of overlapping nuclei being kept. 1261 1262 Parameters 1263 ---------- 1264 nms : float 1265 The NMS IoU threshold value. 1266 """ 1267 1268 self.hyperparameter_nuclei["nms"] = nms
Set the Non-Maximum Suppression (NMS) threshold.
The NMS threshold controls how aggressively overlapping detections are suppressed. A lower value reduces the probability of overlapping nuclei being kept.
Parameters
nms : float The NMS IoU threshold value.
1270 def set_prob(self, prob: float): 1271 """ 1272 Set the probability threshold used in segmentation. 1273 1274 The probability threshold determines the minimum confidence required for an object 1275 (e.g., a nucleus) to be classified as a segmented entity. Higher values result in 1276 fewer segmented objects, as only detections with strong confidence scores are kept. 1277 This may lead to omission of weaker or less distinct structures. 1278 1279 Because optimal values depend on image characteristics, it is important to visually 1280 inspect segmentation results produced with different thresholds to determine the 1281 most suitable setting. 1282 1283 Parameters 1284 ---------- 1285 prob : float 1286 The probability threshold value. 1287 """ 1288 1289 self.hyperparameter_nuclei["prob"] = prob
Set the probability threshold used in segmentation.
The probability threshold determines the minimum confidence required for an object (e.g., a nucleus) to be classified as a segmented entity. Higher values result in fewer segmented objects, as only detections with strong confidence scores are kept. This may lead to omission of weaker or less distinct structures.
Because optimal values depend on image characteristics, it is important to visually inspect segmentation results produced with different thresholds to determine the most suitable setting.
Parameters
prob : float The probability threshold value.
1291 def set_nuclei_circularity(self, circ: float): 1292 """ 1293 This method sets 'circ' parameter. The circ is a parameter used for adjust minimal nucleus circularity. 1294 1295 Parameters 1296 ---------- 1297 circ : float 1298 Nuclei circularity value. 1299 """ 1300 1301 self.hyperparameter_nuclei["circularity"] = circ
This method sets 'circ' parameter. The circ is a parameter used for adjust minimal nucleus circularity.
Parameters
circ : float Nuclei circularity value.
1303 def set_nuclei_local_intenisty_FC(self, local_FC: float): 1304 """ 1305 This method sets the 'FC_diff_local_intensity' parameter. The 'local_FC' is used to remove false positive multiple nuclei that were detected in single image. 1306 1307 Parameters 1308 ---------- 1309 local_FC : float 1310 local_FC value. 1311 """ 1312 1313 self.hyperparameter_nuclei["FC_diff_local_intensity"] = local_FC
This method sets the 'FC_diff_local_intensity' parameter. The 'local_FC' is used to remove false positive multiple nuclei that were detected in single image.
Parameters
local_FC : float local_FC value.
1316 def set_nuclei_global_area_FC(self, global_FC: float): 1317 """ 1318 This method sets the 'FC_diff_global' parameter. The 'global_FC' is used to remove false positive multiple nuclei that were detected in a single image and are outliers from the global mean area size. 1319 1320 Parameters 1321 ---------- 1322 FC_diff_global : float 1323 global_FC value. 1324 """ 1325 1326 self.hyperparameter_nuclei["FC_diff_global"] = global_FC
This method sets the 'FC_diff_global' parameter. The 'global_FC' is used to remove false positive multiple nuclei that were detected in a single image and are outliers from the global mean area size.
Parameters
FC_diff_global : float global_FC value.
1328 def set_nuclei_size(self, size: tuple): 1329 """ 1330 This method sets 'size' parameter. The size is a parameter used for adjust minimal and maximal nucleus area (px). 1331 1332 Parameters 1333 ---------- 1334 size : tuple 1335 (min_value, max_value) 1336 """ 1337 1338 self.hyperparameter_nuclei["min_size"] = size[0] 1339 self.hyperparameter_nuclei["max_size"] = size[1]
This method sets 'size' parameter. The size is a parameter used for adjust minimal and maximal nucleus area (px).
Parameters
size : tuple (min_value, max_value)
1341 def set_nuclei_min_mean_intensity(self, intensity: int): 1342 """ 1343 This method sets 'intensity' parameter. The 'intensity' parameter is used to adjust the minimum mean intensity of all pixel intensities within the nucleus. 1344 1345 Parameters 1346 ---------- 1347 intensity : int 1348 intensity value. 1349 """ 1350 1351 self.hyperparameter_nuclei["intensity_mean"] = intensity
This method sets 'intensity' parameter. The 'intensity' parameter is used to adjust the minimum mean intensity of all pixel intensities within the nucleus.
Parameters
intensity : int intensity value.
1353 def set_chromatinization_size(self, size: tuple): 1354 """ 1355 This method sets 'size' parameter. The size is a parameter used for adjust minimal and maximal chromanitization spot area (px) within the nucleus. 1356 1357 Parameters 1358 ---------- 1359 size : tuple 1360 (min_value, max_value) 1361 """ 1362 1363 self.hyperparameter_chromatinization["min_size"] = size[0] 1364 self.hyperparameter_chromatinization["max_size"] = size[1]
This method sets 'size' parameter. The size is a parameter used for adjust minimal and maximal chromanitization spot area (px) within the nucleus.
Parameters
size : tuple (min_value, max_value)
1366 def set_chromatinization_ratio(self, ratio: int): 1367 """ 1368 This method sets the 'ratio' parameter. In this case, the 'ratio' parameter is similar to 'circularity' as it describes the ratio between the maximum lengths in the x and y dimensions of the nucleus chromatinization. 1369 1370 Parameters 1371 ---------- 1372 ratio : float 1373 ratio value. 1374 """ 1375 1376 self.hyperparameter_chromatinization["ratio"] = ratio
This method sets the 'ratio' parameter. In this case, the 'ratio' parameter is similar to 'circularity' as it describes the ratio between the maximum lengths in the x and y dimensions of the nucleus chromatinization.
Parameters
ratio : float ratio value.
1378 def set_chromatinization_cut_point(self, cut_point: int): 1379 """ 1380 This method sets the 'cut_point' parameter. The 'cut_point' parameter is a factor used to adjust the threshold for separating the background from chromatin spots. 1381 1382 Parameters 1383 ---------- 1384 cut_point : int 1385 cut_point value. 1386 """ 1387 1388 self.hyperparameter_chromatinization["cut_point"] = cut_point
This method sets the 'cut_point' parameter. The 'cut_point' parameter is a factor used to adjust the threshold for separating the background from chromatin spots.
Parameters
cut_point : int cut_point value.
1392 def set_adj_image_gamma(self, gamma: float): 1393 """ 1394 This method sets 'gamma' parameter. The gamma is a parameter used for adjust gamma of the nucleus image. 1395 1396 Parameters 1397 ---------- 1398 gamma : float 1399 gamma value. 1400 """ 1401 1402 self.img_adj_par["gamma"] = gamma
This method sets 'gamma' parameter. The gamma is a parameter used for adjust gamma of the nucleus image.
Parameters
gamma : float gamma value.
1404 def set_adj_image_contrast(self, contrast: float): 1405 """ 1406 This method sets 'contrast' parameter. The contrast is a parameter used for adjust contrast of the nucleus image. 1407 1408 Parameters 1409 ---------- 1410 contrast : float 1411 contrast value. 1412 """ 1413 1414 self.img_adj_par["contrast"] = contrast
This method sets 'contrast' parameter. The contrast is a parameter used for adjust contrast of the nucleus image.
Parameters
contrast : float contrast value.
1416 def set_adj_image_brightness(self, brightness: float): 1417 """ 1418 This method sets 'brightness' parameter. The brightness is a parameter used for adjust brightness of the nucleus image. 1419 1420 Parameters 1421 ---------- 1422 brightness : float 1423 brightness value. 1424 """ 1425 1426 self.img_adj_par["brightness"] = brightness
This method sets 'brightness' parameter. The brightness is a parameter used for adjust brightness of the nucleus image.
Parameters
brightness : float brightness value.
1430 def set_adj_chrom_gamma(self, gamma: float): 1431 """ 1432 This method sets 'gamma' parameter. The gamma is a parameter used for adjust gamma of the nucleus chromatinization image. 1433 1434 Parameters 1435 ---------- 1436 gamma : float 1437 gamma value. 1438 """ 1439 1440 self.img_adj_par_chrom["gamma"] = gamma
This method sets 'gamma' parameter. The gamma is a parameter used for adjust gamma of the nucleus chromatinization image.
Parameters
gamma : float gamma value.
1442 def set_adj_chrom_contrast(self, contrast: float): 1443 """ 1444 This method sets 'contrast' parameter. The contrast is a parameter used for adjust contrast of the nucleus chromatinization image. 1445 1446 Parameters 1447 ---------- 1448 contrast : float 1449 contrast value. 1450 """ 1451 1452 self.img_adj_par_chrom["contrast"] = contrast
This method sets 'contrast' parameter. The contrast is a parameter used for adjust contrast of the nucleus chromatinization image.
Parameters
contrast : float contrast value.
1454 def set_adj_chrom_brightness(self, brightness: float): 1455 """ 1456 This method sets 'brightness' parameter. The brightness is a parameter used for adjust brightness of the nucleus chromatinization image. 1457 1458 Parameters 1459 ---------- 1460 brightness : float 1461 brightness value. 1462 """ 1463 1464 self.img_adj_par_chrom["brightness"] = brightness
This method sets 'brightness' parameter. The brightness is a parameter used for adjust brightness of the nucleus chromatinization image.
Parameters
brightness : float brightness value.
1466 @property 1467 def current_parameters_nuclei(self): 1468 """ 1469 This method returns current nuclei analysis parameters. 1470 1471 Returns 1472 ------- 1473 dict 1474 Nuclei analysis parameters. 1475 """ 1476 print(self.hyperparameter_nuclei) 1477 return self.hyperparameter_nuclei
This method returns current nuclei analysis parameters.
Returns
dict Nuclei analysis parameters.
1479 @property 1480 def current_parameters_chromatinization(self): 1481 """ 1482 This method returns current nuclei chromatinization analysis parameters. 1483 1484 Returns 1485 ------- 1486 dict 1487 Nuclei chromatinization analysis parameters. 1488 """ 1489 1490 print(self.hyperparameter_chromatinization) 1491 return self.hyperparameter_chromatinization
This method returns current nuclei chromatinization analysis parameters.
Returns
dict Nuclei chromatinization analysis parameters.
1493 @property 1494 def current_parameters_img_adj(self): 1495 """ 1496 This method returns current nuclei image setup. 1497 1498 Returns 1499 ------- 1500 dict 1501 Nuclei image setup. 1502 """ 1503 1504 print(self.img_adj_par) 1505 return self.img_adj_par
This method returns current nuclei image setup.
Returns
dict Nuclei image setup.
1507 @property 1508 def current_parameters_img_adj_chro(self): 1509 """ 1510 This method returns current nuclei chromatinization image setup. 1511 1512 Returns 1513 ------- 1514 dict 1515 Nuclei chromatinization image setup. 1516 """ 1517 1518 print(self.img_adj_par_chrom) 1519 return self.img_adj_par_chrom
This method returns current nuclei chromatinization image setup.
Returns
dict Nuclei chromatinization image setup.
1521 def get_results_nuclei(self): 1522 """ 1523 This function returns nuclei analysis results. 1524 1525 Returns 1526 ------- 1527 dict 1528 Nuclei results in the dictionary format. 1529 """ 1530 1531 if self.images["nuclei"] is None: 1532 print("No results to return!") 1533 return None 1534 else: 1535 if cfg._DISPLAY_MODE: 1536 if self.show_plots: 1537 display_preview(self.resize_to_screen_img(self.images["nuclei"])) 1538 return self.nuclei_results["nuclei"], self.images["nuclei"]
This function returns nuclei analysis results.
Returns
dict Nuclei results in the dictionary format.
1540 def get_results_nuclei_selected(self): 1541 """ 1542 This function returns the results of the nuclei analysis following adjustments to the data selection thresholds. 1543 1544 Returns 1545 ------- 1546 dict 1547 Nuclei results in the dictionary format. 1548 """ 1549 1550 if self.images["nuclei_reduced"] is None: 1551 print("No results to return!") 1552 return None 1553 else: 1554 if cfg._DISPLAY_MODE: 1555 if self.show_plots: 1556 display_preview( 1557 self.resize_to_screen_img(self.images["nuclei_reduced"]) 1558 ) 1559 return self.nuclei_results["nuclei_reduced"], self.images["nuclei_reduced"]
This function returns the results of the nuclei analysis following adjustments to the data selection thresholds.
Returns
dict Nuclei results in the dictionary format.
1561 def get_results_nuclei_chromatinization(self): 1562 """ 1563 This function returns the results of the nuclei chromatinization analysis. 1564 1565 Returns 1566 ------- 1567 dict 1568 Nuclei chromatinization results in the dictionary format. 1569 """ 1570 1571 if self.images["nuclei_chromatinization"] is None: 1572 print("No results to return!") 1573 return None 1574 else: 1575 if cfg._DISPLAY_MODE: 1576 if self.show_plots: 1577 display_preview(self.images["nuclei_chromatinization"]) 1578 return ( 1579 self.nuclei_results["nuclei_chromatinization"], 1580 self.images["nuclei_chromatinization"], 1581 )
This function returns the results of the nuclei chromatinization analysis.
Returns
dict Nuclei chromatinization results in the dictionary format.
1590 def input_image(self, img): 1591 """ 1592 This method adds the image to the class for nuclei and/or chromatinization analysis. 1593 1594 Parameters 1595 ---------- 1596 img : np.ndarray 1597 Input image. 1598 """ 1599 1600 self.image = img 1601 self.add_test(None)
This method adds the image to the class for nuclei and/or chromatinization analysis.
Parameters
img : np.ndarray Input image.
1603 def get_features(self, model_out, image): 1604 """ 1605 Extracts numerical feature descriptors from model output for a given image. 1606 1607 This method processes the output returned by a feature-extraction model 1608 (e.g., CNN, encoder network, statistical model) and converts it into a 1609 structured feature vector associated with the provided image. 1610 Typically used for downstream analysis, classification, or clustering. 1611 1612 Parameters 1613 ---------- 1614 model_out : any 1615 Output returned by the feature-extraction model. 1616 The expected format depends on the model (e.g., tensor, dict, list of arrays). 1617 1618 image : ndarray 1619 The input image (2D or 3D array) for which features are being extracted. 1620 Provided for reference or for combining raw image metrics with model features. 1621 1622 Returns 1623 ------- 1624 features : dict 1625 Dictionary containing extracted features. 1626 Keys correspond to feature names, and values are numerical descriptors. 1627 """ 1628 1629 features = { 1630 "label": [], 1631 "area": [], 1632 "area_bbox": [], 1633 "area_convex": [], 1634 "area_filled": [], 1635 "axis_major_length": [], 1636 "axis_minor_length": [], 1637 "eccentricity": [], 1638 "equivalent_diameter_area": [], 1639 "feret_diameter_max": [], 1640 "solidity": [], 1641 "perimeter": [], 1642 "perimeter_crofton": [], 1643 "circularity": [], 1644 "intensity_max": [], 1645 "intensity_mean": [], 1646 "intensity_min": [], 1647 "ratio": [], 1648 "coords": [], 1649 } 1650 1651 for region in skimage.measure.regionprops(model_out, intensity_image=image): 1652 1653 # Compute circularity 1654 if region.perimeter > 0: 1655 circularity = 4 * np.pi * region.area / (region.perimeter**2) 1656 else: 1657 circularity = 0 1658 1659 features["area"].append(region.area) 1660 features["area_bbox"].append(region.area_bbox) 1661 features["area_convex"].append(region.area_convex) 1662 features["area_filled"].append(region.area_filled) 1663 features["axis_major_length"].append(region.axis_major_length) 1664 features["axis_minor_length"].append(region.axis_minor_length) 1665 features["eccentricity"].append(region.eccentricity) 1666 features["equivalent_diameter_area"].append(region.equivalent_diameter_area) 1667 features["feret_diameter_max"].append(region.feret_diameter_max) 1668 features["solidity"].append(region.solidity) 1669 features["perimeter"].append(region.perimeter) 1670 features["perimeter_crofton"].append(region.perimeter_crofton) 1671 features["label"].append(region.label) 1672 features["coords"].append(region.coords) 1673 features["circularity"].append(circularity) 1674 features["intensity_max"].append(np.max(region.intensity_max)) 1675 features["intensity_min"].append(np.max(region.intensity_min)) 1676 features["intensity_mean"].append(np.max(region.intensity_mean)) 1677 1678 ratios = [] 1679 1680 # Calculate the ratio for each pair of values 1681 for min_len, max_len in zip( 1682 features["axis_minor_length"], features["axis_major_length"] 1683 ): 1684 if max_len != 0: 1685 ratio = min_len / max_len 1686 ratios.append(ratio) 1687 else: 1688 ratios.append(float(0.0)) 1689 1690 features["ratio"] = ratios 1691 1692 return features
Extracts numerical feature descriptors from model output for a given image.
This method processes the output returned by a feature-extraction model (e.g., CNN, encoder network, statistical model) and converts it into a structured feature vector associated with the provided image. Typically used for downstream analysis, classification, or clustering.
Parameters
model_out : any Output returned by the feature-extraction model. The expected format depends on the model (e.g., tensor, dict, list of arrays).
image : ndarray The input image (2D or 3D array) for which features are being extracted. Provided for reference or for combining raw image metrics with model features.
Returns
features : dict Dictionary containing extracted features. Keys correspond to feature names, and values are numerical descriptors.
1695 def nuclei_finder_test(self): 1696 """ 1697 This method performs testing analysis of parameters (specified 'nms' and 'prob' parameters) 1698 for the image provided by the input_image() method. 1699 1700 This method evaluates the performance of the internal NucleiFinder 1701 configuration using the currently loaded images, parameters, or model 1702 settings. It is typically used to check whether the detection, segmentation 1703 or preprocessing stages run correctly on sample data. 1704 1705 Examples 1706 -------- 1707 >>> nf.nuclei_finder_test() 1708 >>> nf.browser_test() 1709 """ 1710 1711 StarDist2D.from_pretrained() 1712 model = StarDist2D.from_pretrained("2D_versatile_fluo") 1713 1714 nmst = [0.1, 0.2, 0.6] 1715 probt = [0.1, 0.5, 0.9] 1716 1717 try: 1718 img = cv2.cvtColor(self.image, cv2.COLOR_BGR2GRAY) 1719 except: 1720 img = self.image 1721 1722 plot = [] 1723 1724 # adj img 1725 img = adjust_img_16bit( 1726 img, 1727 brightness=self.img_adj_par["brightness"], 1728 contrast=self.img_adj_par["contrast"], 1729 gamma=self.img_adj_par["gamma"], 1730 ) 1731 img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 1732 1733 fig = plt.figure(dpi=300) 1734 plt.imshow(img) 1735 plt.axis("off") 1736 plt.title("Original", fontsize=25) 1737 1738 if cfg._DISPLAY_MODE: 1739 if self.show_plots: 1740 plt.show() 1741 1742 plot.append(fig) 1743 1744 for n in tqdm(nmst, desc="Loop 1: nmst"): 1745 print(f"\n➡️ Starting outer loop for n = {n}") 1746 1747 for t in tqdm(probt, desc=f" ↳ Loop 2 for n={n}", leave=False): 1748 print(f" → Starting inner loop for t = {t}") 1749 1750 labels, _ = model.predict_instances( 1751 normalize(img.copy()), nms_thresh=n, prob_thresh=t 1752 ) 1753 1754 tmp = self.get_features(model_out=labels, image=img) 1755 1756 fig = plt.figure(dpi=300) 1757 plt.imshow(render_label(labels, img=img)) 1758 plt.axis("off") 1759 plt.title( 1760 f"nms {n} & prob {t} \n detected nuc: {len(tmp['area'])}", 1761 fontsize=25, 1762 ) 1763 1764 if cfg._DISPLAY_MODE: 1765 if self.show_plots: 1766 plt.show() 1767 1768 plot.append(fig) 1769 1770 self.add_test(plot)
This method performs testing analysis of parameters (specified 'nms' and 'prob' parameters) for the image provided by the input_image() method.
This method evaluates the performance of the internal NucleiFinder configuration using the currently loaded images, parameters, or model settings. It is typically used to check whether the detection, segmentation or preprocessing stages run correctly on sample data.
Examples
>>> nf.nuclei_finder_test()
>>> nf.browser_test()
1772 def find_nuclei(self): 1773 """ 1774 Performs analysis on the image provided by the ``input_image()`` method 1775 using default or user-defined parameters. 1776 1777 To show current parameters, use: 1778 - ``current_parameters_nuclei`` 1779 - ``current_parameters_img_adj`` 1780 1781 To set new parameters, use: 1782 - ``set_nms()`` 1783 - ``set_prob()`` 1784 - ``set_adj_image_gamma()`` 1785 - ``set_adj_image_contrast()`` 1786 - ``set_adj_image_brightness()`` 1787 1788 To get analysis results, use: 1789 - ``get_results_nuclei()`` 1790 """ 1791 1792 if isinstance(self.image, np.ndarray): 1793 1794 model = StarDist2D.from_pretrained("2D_versatile_fluo") 1795 1796 try: 1797 img = cv2.cvtColor(self.image, cv2.COLOR_BGR2GRAY) 1798 except: 1799 img = self.image 1800 1801 img = adjust_img_16bit( 1802 img, 1803 brightness=self.img_adj_par["brightness"], 1804 contrast=self.img_adj_par["contrast"], 1805 gamma=self.img_adj_par["gamma"], 1806 ) 1807 img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 1808 labels, _ = model.predict_instances( 1809 normalize(img), 1810 nms_thresh=self.hyperparameter_nuclei["nms"], 1811 prob_thresh=self.hyperparameter_nuclei["prob"], 1812 ) 1813 1814 self.nuclei_results["nuclei"] = self.get_features( 1815 model_out=labels, image=img 1816 ) 1817 1818 if len(self.nuclei_results["nuclei"]["coords"]) > 0: 1819 1820 oryginal = adjust_img_16bit(img, color="gray") 1821 1822 # series repaired nuclesu 1823 if self.series_im is True: 1824 self.images["nuclei"] = oryginal 1825 else: 1826 nuclei_mask = adjust_img_16bit( 1827 cv2.cvtColor( 1828 self.create_mask(self.nuclei_results["nuclei"], oryginal), 1829 cv2.COLOR_BGR2GRAY, 1830 ), 1831 color="blue", 1832 ) 1833 concatenated_image = cv2.hconcat([oryginal, nuclei_mask]) 1834 self.images["nuclei"] = concatenated_image 1835 1836 if cfg._DISPLAY_MODE: 1837 if self.show_plots: 1838 display_preview( 1839 self.resize_to_screen_img(self.images["nuclei"]) 1840 ) 1841 1842 else: 1843 1844 self.nuclei_results["nuclei"] = None 1845 self.nuclei_results["nuclei_reduced"] = None 1846 self.nuclei_results["nuclei_chromatinization"] = None 1847 1848 print("Nuclei not detected!") 1849 1850 else: 1851 print("\nAdd image firstly!")
Performs analysis on the image provided by the input_image() method
using default or user-defined parameters.
To show current parameters, use:
- current_parameters_nuclei
- current_parameters_img_adj
To set new parameters, use:
- set_nms()
- set_prob()
- set_adj_image_gamma()
- set_adj_image_contrast()
- set_adj_image_brightness()
To get analysis results, use:
- get_results_nuclei()
1853 def select_nuclei(self): 1854 """ 1855 Selects data obtained from ``find_nuclei()`` based on the set threshold parameters. 1856 1857 To show current parameters, use: 1858 - ``current_parameters_nuclei`` 1859 1860 To set new parameters, use: 1861 - ``set_nuclei_circularity()`` 1862 - ``set_nuclei_size()`` 1863 - ``set_nuclei_min_mean_intensity()`` 1864 1865 To get analysis results, use: 1866 - ``get_results_nuclei_selected()`` 1867 """ 1868 1869 if self.nuclei_results["nuclei"] is not None: 1870 input_in = copy.deepcopy(self.nuclei_results["nuclei"]) 1871 1872 nuclei_dictionary = self.drop_dict( 1873 input_in, 1874 key="area", 1875 var=self.hyperparameter_nuclei["min_size"], 1876 action=">", 1877 ) 1878 nuclei_dictionary = self.drop_dict( 1879 nuclei_dictionary, 1880 key="area", 1881 var=self.hyperparameter_nuclei["max_size"], 1882 action="<", 1883 ) 1884 nuclei_dictionary = self.drop_dict( 1885 nuclei_dictionary, 1886 key="intensity_mean", 1887 var=self.hyperparameter_nuclei["intensity_mean"], 1888 action=">", 1889 ) 1890 1891 if len(nuclei_dictionary["coords"]) > 0: 1892 1893 self.nuclei_results["nuclei_reduced"] = nuclei_dictionary 1894 1895 try: 1896 img = cv2.cvtColor(self.image, cv2.COLOR_BGR2GRAY) 1897 except: 1898 img = self.image 1899 1900 oryginal = adjust_img_16bit(img, color="gray") 1901 1902 # series repaired nuclesu 1903 if self.series_im is True: 1904 self.images["nuclei_reduced"] = oryginal 1905 else: 1906 nuclei_mask = adjust_img_16bit( 1907 cv2.cvtColor( 1908 self.create_mask( 1909 self.nuclei_results["nuclei_reduced"], oryginal 1910 ), 1911 cv2.COLOR_BGR2GRAY, 1912 ), 1913 color="blue", 1914 ) 1915 concatenated_image = cv2.hconcat([oryginal, nuclei_mask]) 1916 1917 self.images["nuclei_reduced"] = concatenated_image 1918 1919 if cfg._DISPLAY_MODE: 1920 if self.show_plots: 1921 display_preview( 1922 self.resize_to_screen_img(self.images["nuclei_reduced"]) 1923 ) 1924 1925 else: 1926 self.nuclei_results["nuclei"] = None 1927 self.nuclei_results["nuclei_reduced"] = None 1928 self.nuclei_results["nuclei_chromatinization"] = None 1929 1930 print("Selected zero nuclei! Analysis stop!") 1931 1932 else: 1933 print("Lack of nuclei data to select!")
Selects data obtained from find_nuclei() based on the set threshold parameters.
To show current parameters, use:
- current_parameters_nuclei
To set new parameters, use:
- set_nuclei_circularity()
- set_nuclei_size()
- set_nuclei_min_mean_intensity()
To get analysis results, use:
- get_results_nuclei_selected()
1935 def nuclei_chromatinization(self): 1936 """ 1937 Performs chromatinization analysis of nuclei using data obtained from 1938 ``find_nuclei()`` and/or ``select_nuclei()``. 1939 1940 To show current parameters, use: 1941 - ``current_parameters_chromatinization`` 1942 - ``current_parameters_img_adj_chro`` 1943 1944 To set new parameters, use: 1945 - ``set_chromatinization_size()`` 1946 - ``set_chromatinization_ratio()`` 1947 - ``set_chromatinization_cut_point()`` 1948 - ``set_adj_chrom_gamma()`` 1949 - ``set_adj_chrom_contrast()`` 1950 - ``set_adj_chrom_brightness()`` 1951 1952 To get analysis results, use: 1953 - ``get_results_nuclei_chromatinization()`` 1954 """ 1955 1956 def add_lists(f, g): 1957 1958 result = [] 1959 max_length = max(len(f), len(g)) 1960 1961 for i in range(max_length): 1962 f_elem = f[i] if i < len(f) else "" 1963 g_elem = g[i] if i < len(g) else "" 1964 result.append(f_elem + g_elem) 1965 1966 return result 1967 1968 def reverse_coords(image, x, y): 1969 1970 zero = np.zeros(image.shape) 1971 1972 zero[x, y] = 2**16 1973 1974 zero_indices = np.where(zero == 0) 1975 1976 return zero_indices[0], zero_indices[1] 1977 1978 if isinstance(self.nuclei_results["nuclei_reduced"], dict): 1979 nuclei_dictionary = self.nuclei_results["nuclei_reduced"] 1980 else: 1981 nuclei_dictionary = self.nuclei_results["nuclei"] 1982 1983 if nuclei_dictionary is not None: 1984 arrays_list = copy.deepcopy(nuclei_dictionary["coords"]) 1985 1986 chromatione_info = { 1987 "area": [], 1988 "area_bbox": [], 1989 "area_convex": [], 1990 "area_filled": [], 1991 "axis_major_length": [], 1992 "axis_minor_length": [], 1993 "eccentricity": [], 1994 "equivalent_diameter_area": [], 1995 "feret_diameter_max": [], 1996 "solidity": [], 1997 "perimeter": [], 1998 "perimeter_crofton": [], 1999 "coords": [], 2000 } 2001 2002 full_im = np.zeros(self.image.shape[0:2], dtype=np.uint16) 2003 full_im = adjust_img_16bit(full_im) 2004 2005 for arr in arrays_list: 2006 x = list(arr[:, 0]) 2007 y = list(arr[:, 1]) 2008 2009 x1, y1 = reverse_coords(self.image, x, y) 2010 2011 regions_chro2 = self.image.copy() 2012 2013 regions_chro2[x1, y1] = 0 2014 2015 regions_chro2 = regions_chro2.astype("uint16") 2016 2017 try: 2018 regions_chro2 = cv2.cvtColor(regions_chro2, cv2.COLOR_BGR2GRAY) 2019 except: 2020 pass 2021 2022 regions_chro2 = adjust_img_16bit( 2023 regions_chro2, 2024 brightness=self.img_adj_par_chrom["brightness"], 2025 contrast=self.img_adj_par_chrom["contrast"], 2026 gamma=self.img_adj_par_chrom["gamma"], 2027 ) 2028 2029 full_im = merge_images( 2030 image_list=[full_im, regions_chro2], intensity_factors=[1, 1] 2031 ) 2032 2033 ret, thresh = cv2.threshold( 2034 regions_chro2[x, y], 2035 0, 2036 2**16 - 1, 2037 cv2.THRESH_BINARY + cv2.THRESH_OTSU, 2038 ) 2039 2040 regions_chro2[ 2041 regions_chro2 2042 <= ret * self.hyperparameter_chromatinization["cut_point"] 2043 ] = 0 2044 2045 regions_chro2 = cv2.cvtColor(regions_chro2, cv2.COLOR_BGR2GRAY) 2046 2047 chromatione = regions_chro2 > 0 2048 2049 labeled_cells = measure.label(chromatione) 2050 regions = measure.regionprops(labeled_cells) 2051 regions = measure.regionprops( 2052 labeled_cells, intensity_image=regions_chro2 2053 ) 2054 2055 for region in regions: 2056 2057 chromatione_info["area"].append(region.area) 2058 chromatione_info["area_bbox"].append(region.area_bbox) 2059 chromatione_info["area_convex"].append(region.area_convex) 2060 chromatione_info["area_filled"].append(region.area_filled) 2061 chromatione_info["axis_major_length"].append( 2062 region.axis_major_length 2063 ) 2064 chromatione_info["axis_minor_length"].append( 2065 region.axis_minor_length 2066 ) 2067 chromatione_info["eccentricity"].append(region.eccentricity) 2068 chromatione_info["equivalent_diameter_area"].append( 2069 region.equivalent_diameter_area 2070 ) 2071 chromatione_info["feret_diameter_max"].append( 2072 region.feret_diameter_max 2073 ) 2074 chromatione_info["solidity"].append(region.solidity) 2075 chromatione_info["perimeter"].append(region.perimeter) 2076 chromatione_info["perimeter_crofton"].append( 2077 region.perimeter_crofton 2078 ) 2079 chromatione_info["coords"].append(region.coords) 2080 2081 ratios = [] 2082 2083 for min_len, max_len in zip( 2084 chromatione_info["axis_minor_length"], 2085 chromatione_info["axis_major_length"], 2086 ): 2087 if max_len != 0: 2088 ratio = min_len / max_len 2089 ratios.append(ratio) 2090 else: 2091 ratios.append(float(0.0)) 2092 2093 chromatione_info["ratio"] = ratios 2094 2095 chromation_dic = self.drop_dict( 2096 chromatione_info, 2097 key="area", 2098 var=self.hyperparameter_chromatinization["min_size"], 2099 action=">", 2100 ) 2101 chromation_dic = self.drop_dict( 2102 chromation_dic, 2103 key="area", 2104 var=self.hyperparameter_chromatinization["max_size"], 2105 action="<", 2106 ) 2107 chromation_dic = self.drop_dict( 2108 chromation_dic, 2109 key="ratio", 2110 var=self.hyperparameter_chromatinization["ratio"], 2111 action=">", 2112 ) 2113 2114 arrays_list2 = copy.deepcopy(chromation_dic["coords"]) 2115 2116 nuclei_dictionary["spot_size_area"] = [] 2117 nuclei_dictionary["spot_size_area_bbox"] = [] 2118 nuclei_dictionary["spot_size_area_convex"] = [] 2119 nuclei_dictionary["spot_size_area_filled"] = [] 2120 nuclei_dictionary["spot_axis_major_length"] = [] 2121 nuclei_dictionary["spot_axis_minor_length"] = [] 2122 nuclei_dictionary["spot_eccentricity"] = [] 2123 nuclei_dictionary["spot_size_equivalent_diameter_area"] = [] 2124 nuclei_dictionary["spot_feret_diameter_max"] = [] 2125 nuclei_dictionary["spot_perimeter"] = [] 2126 nuclei_dictionary["spot_perimeter_crofton"] = [] 2127 2128 for i, arr in enumerate(arrays_list): 2129 2130 spot_size_area = [] 2131 spot_size_area_bbox = [] 2132 spot_size_area_convex = [] 2133 spot_size_area_convex = [] 2134 spot_size_area_filled = [] 2135 spot_axis_major_length = [] 2136 spot_axis_minor_length = [] 2137 spot_eccentricity = [] 2138 spot_size_equivalent_diameter_area = [] 2139 spot_feret_diameter_max = [] 2140 spot_perimeter = [] 2141 spot_perimeter_crofton = [] 2142 2143 # Flatten the array, 2144 df_tmp = pd.DataFrame(arr) 2145 df_tmp["duplicates"] = add_lists( 2146 [str(x) for x in df_tmp[0]], [str(y) for y in df_tmp[1]] 2147 ) 2148 2149 counter_tmp = Counter(df_tmp["duplicates"]) 2150 2151 for j, arr2 in enumerate(arrays_list2): 2152 df_tmp2 = pd.DataFrame(arr2) 2153 df_tmp2["duplicates"] = add_lists( 2154 [str(x) for x in df_tmp2[0]], [str(y) for y in df_tmp2[1]] 2155 ) 2156 2157 counter_tmp2 = Counter(df_tmp2["duplicates"]) 2158 intersection_length = len(counter_tmp.keys() & counter_tmp2.keys()) 2159 min_length = min(len(counter_tmp), len(counter_tmp2)) 2160 2161 if intersection_length >= 0.8 * min_length: 2162 2163 if ( 2164 len(list(df_tmp2["duplicates"])) 2165 / len(list(df_tmp["duplicates"])) 2166 ) >= 0.025 and ( 2167 len(list(df_tmp2["duplicates"])) 2168 / len(list(df_tmp["duplicates"])) 2169 ) <= 0.5: 2170 spot_size_area.append(chromation_dic["area"][j]) 2171 spot_size_area_bbox.append(chromation_dic["area_bbox"][j]) 2172 spot_size_area_convex.append( 2173 chromation_dic["area_convex"][j] 2174 ) 2175 spot_size_area_filled.append( 2176 chromation_dic["area_filled"][j] 2177 ) 2178 spot_axis_major_length.append( 2179 chromation_dic["axis_major_length"][j] 2180 ) 2181 spot_axis_minor_length.append( 2182 chromation_dic["axis_minor_length"][j] 2183 ) 2184 spot_eccentricity.append(chromation_dic["eccentricity"][j]) 2185 spot_size_equivalent_diameter_area.append( 2186 chromation_dic["equivalent_diameter_area"][j] 2187 ) 2188 spot_feret_diameter_max.append( 2189 chromation_dic["feret_diameter_max"][j] 2190 ) 2191 spot_perimeter.append(chromation_dic["perimeter"][j]) 2192 spot_perimeter_crofton.append( 2193 chromation_dic["perimeter_crofton"][j] 2194 ) 2195 2196 nuclei_dictionary["spot_size_area"].append(spot_size_area) 2197 nuclei_dictionary["spot_size_area_bbox"].append(spot_size_area_bbox) 2198 nuclei_dictionary["spot_size_area_convex"].append(spot_size_area_convex) 2199 nuclei_dictionary["spot_size_area_filled"].append(spot_size_area_filled) 2200 nuclei_dictionary["spot_axis_major_length"].append( 2201 spot_axis_major_length 2202 ) 2203 nuclei_dictionary["spot_axis_minor_length"].append( 2204 spot_axis_minor_length 2205 ) 2206 nuclei_dictionary["spot_eccentricity"].append(spot_eccentricity) 2207 nuclei_dictionary["spot_size_equivalent_diameter_area"].append( 2208 spot_size_equivalent_diameter_area 2209 ) 2210 nuclei_dictionary["spot_feret_diameter_max"].append( 2211 spot_feret_diameter_max 2212 ) 2213 nuclei_dictionary["spot_perimeter"].append(spot_perimeter) 2214 nuclei_dictionary["spot_perimeter_crofton"].append( 2215 spot_perimeter_crofton 2216 ) 2217 2218 self.nuclei_results["chromatinization"] = chromation_dic 2219 self.nuclei_results["nuclei_chromatinization"] = nuclei_dictionary 2220 2221 self.images["nuclei_chromatinization"] = self.create_mask( 2222 chromation_dic, self.image 2223 ) 2224 2225 img_chrom = adjust_img_16bit( 2226 cv2.cvtColor( 2227 self.create_mask( 2228 self.nuclei_results["chromatinization"], self.image 2229 ), 2230 cv2.COLOR_BGR2GRAY, 2231 ), 2232 color="yellow", 2233 ) 2234 2235 if isinstance(self.nuclei_results["nuclei_reduced"], dict): 2236 nuclei_mask = adjust_img_16bit( 2237 cv2.cvtColor( 2238 self.create_mask( 2239 self.nuclei_results["nuclei_reduced"], self.image 2240 ), 2241 cv2.COLOR_BGR2GRAY, 2242 ), 2243 color="blue", 2244 ) 2245 else: 2246 nuclei_mask = adjust_img_16bit( 2247 cv2.cvtColor( 2248 self.create_mask(self.nuclei_results["nuclei"], self.image), 2249 cv2.COLOR_BGR2GRAY, 2250 ), 2251 color="blue", 2252 ) 2253 2254 nuclei_mask = merge_images([nuclei_mask, img_chrom], [1, 1]) 2255 2256 try: 2257 img = cv2.cvtColor(full_im, cv2.COLOR_BGR2GRAY) 2258 except: 2259 img = full_im 2260 2261 oryginal = adjust_img_16bit(img, color="gray") 2262 2263 concatenated_image = cv2.hconcat([oryginal, nuclei_mask]) 2264 2265 self.images["nuclei_chromatinization"] = concatenated_image 2266 2267 if cfg._DISPLAY_MODE: 2268 if self.show_plots: 2269 display_preview( 2270 self.resize_to_screen_img( 2271 self.images["nuclei_chromatinization"] 2272 ) 2273 ) 2274 2275 else: 2276 print("Lack of nuclei data to select!")
Performs chromatinization analysis of nuclei using data obtained from
find_nuclei() and/or select_nuclei().
To show current parameters, use:
- current_parameters_chromatinization
- current_parameters_img_adj_chro
To set new parameters, use:
- set_chromatinization_size()
- set_chromatinization_ratio()
- set_chromatinization_cut_point()
- set_adj_chrom_gamma()
- set_adj_chrom_contrast()
- set_adj_chrom_brightness()
To get analysis results, use:
- get_results_nuclei_chromatinization()
2587 def browser_test(self): 2588 """ 2589 Displays test results generated by the ``nuclei_finder_test()`` method 2590 in the default web browser. 2591 """ 2592 2593 html_content = "" 2594 2595 for fig in self.test_results: 2596 buf = BytesIO() 2597 fig.savefig(buf, format="png", bbox_inches="tight") 2598 buf.seek(0) 2599 2600 img_base64 = base64.b64encode(buf.read()).decode("utf-8") 2601 2602 html_content += f'<img src="data:image/png;base64,{img_base64}" style="margin:10px;"/>\n' 2603 2604 with tempfile.NamedTemporaryFile( 2605 mode="w", delete=False, suffix=".html" 2606 ) as tmp_file: 2607 tmp_file.write(html_content) 2608 tmp_filename = tmp_file.name 2609 2610 webbrowser.open_new_tab(tmp_filename)
Displays test results generated by the nuclei_finder_test() method
in the default web browser.
2612 def series_analysis_chromatinization( 2613 self, 2614 path_to_images: str, 2615 file_extension: str = "tiff", 2616 selected_id: list = [], 2617 fille_name_part: str = "", 2618 selection_opt: bool = True, 2619 include_img: bool = True, 2620 test_series: int = 0, 2621 ): 2622 """ 2623 Performs full analysis on images provided via the ``input_image()`` method 2624 using default or user-defined parameters. 2625 2626 This method runs nuclei detection, nuclei selection, and chromatinization 2627 analysis in a single pipeline. Users can adjust parameters for each step 2628 before running the analysis. 2629 2630 To show current parameters, use: 2631 - ``current_parameters_nuclei`` 2632 - ``current_parameters_img_adj`` 2633 - ``current_parameters_chromatinization`` 2634 - ``current_parameters_img_adj_chro`` 2635 2636 To set new parameters, use: 2637 - ``set_nms()`` 2638 - ``set_prob()`` 2639 - ``set_adj_image_gamma()`` 2640 - ``set_adj_image_contrast()`` 2641 - ``set_adj_image_brightness()`` 2642 - ``set_nuclei_circularity()`` 2643 - ``set_nuclei_size()`` 2644 - ``set_nuclei_min_mean_intensity()`` 2645 - ``set_chromatinization_size()`` 2646 - ``set_chromatinization_ratio()`` 2647 - ``set_chromatinization_cut_point()`` 2648 - ``set_adj_chrom_gamma()`` 2649 - ``set_adj_chrom_contrast()`` 2650 - ``set_adj_chrom_brightness()`` 2651 2652 Parameters 2653 ---------- 2654 path_to_images : str 2655 Path to the directory containing images for analysis. 2656 2657 file_extension : str, optional 2658 Extension of the image files. Default is 'tiff'. 2659 2660 selected_id : list, optional 2661 List of IDs that must be part of the image name to distinguish them 2662 from others. Default is an empty list, which means all images in 2663 the directory will be processed. 2664 2665 fille_name_part : str, optional 2666 Part of the file name to filter images. Default is an empty string. 2667 2668 selection_opt : bool, optional 2669 Whether to run ``select_nuclei()`` with the defined parameters. Default is True. 2670 2671 include_img : bool, optional 2672 Whether to include the images in the result dictionary. Default is True. 2673 2674 test_series : int, optional 2675 Number of images to test the parameters and return results. Default is 0, 2676 which means all images in the directory will be processed. 2677 2678 Returns 2679 ------- 2680 results_dict : dict 2681 Dictionary containing results for each image in the directory. 2682 Keys correspond to image file names. 2683 2684 Notes 2685 ----- 2686 This method runs the complete nuclei and chromatinization analysis pipeline. 2687 2688 Parameters must be set appropriately before calling to ensure correct results. 2689 """ 2690 2691 results_dict = {} 2692 results_img = {} 2693 results_img_raw = {} 2694 2695 files = glob.glob(os.path.join(path_to_images, "*." + file_extension)) 2696 2697 if len(fille_name_part) > 0: 2698 files = [x for x in files if fille_name_part.lower() in x.lower()] 2699 2700 if len(selected_id) > 0: 2701 selected_id = [str(x) for x in selected_id] 2702 files = [ 2703 x 2704 for x in files 2705 if re.sub("_.*", "", os.path.basename(x)) in selected_id 2706 ] 2707 2708 if test_series > 0: 2709 2710 files = random.sample(files, test_series) 2711 2712 self.show_plots = False 2713 self.series_im = True 2714 2715 print("\nFile analysis:\n\n") 2716 2717 for file in tqdm(files): 2718 2719 print(file) 2720 2721 self.show_plots = False 2722 2723 image = self.load_image(file) 2724 2725 self.input_image(image) 2726 2727 self.find_nuclei() 2728 2729 tmp = None 2730 2731 if selection_opt is True: 2732 self.select_nuclei() 2733 tmp = self.get_results_nuclei_selected() 2734 2735 else: 2736 tmp = self.get_results_nuclei() 2737 2738 if tmp is not None: 2739 2740 if tmp[0] is not None: 2741 2742 results_dict[str(os.path.basename(file))] = tmp[0] 2743 results_img[str(os.path.basename(file))] = tmp[1] 2744 results_img_raw[str(os.path.basename(file))] = image 2745 del tmp 2746 del image 2747 2748 results_dict_tmp = self.repairing_nuclei(results_dict) 2749 2750 results_dict = {} 2751 2752 print("\nChromatization searching:\n\n") 2753 2754 for ke in tqdm(results_dict_tmp.keys()): 2755 2756 tmp = None 2757 2758 try: 2759 self._nuclei_chromatinization_series( 2760 results_img_raw[ke], results_dict_tmp[ke] 2761 ) 2762 tmp = self.get_results_nuclei_chromatinization() 2763 except: 2764 print(f"Sample {ke} could not be processed.") 2765 2766 if tmp is not None: 2767 2768 if tmp[0] is not None: 2769 2770 tmp[0].pop("coords") 2771 2772 if include_img: 2773 results_dict[str(os.path.basename(ke))] = { 2774 "stats": tmp[0], 2775 "img": cv2.hconcat([results_img[ke], tmp[1]]), 2776 } 2777 del tmp 2778 else: 2779 results_dict[str(os.path.basename(ke))] = tmp[0] 2780 del tmp 2781 2782 else: 2783 print(f"Unable to obtain results for {print(ke)}") 2784 2785 self.show_plots = True 2786 self.series_im = False 2787 2788 return results_dict
Performs full analysis on images provided via the input_image() method
using default or user-defined parameters.
This method runs nuclei detection, nuclei selection, and chromatinization analysis in a single pipeline. Users can adjust parameters for each step before running the analysis.
To show current parameters, use:
- current_parameters_nuclei
- current_parameters_img_adj
- current_parameters_chromatinization
- current_parameters_img_adj_chro
To set new parameters, use:
- set_nms()
- set_prob()
- set_adj_image_gamma()
- set_adj_image_contrast()
- set_adj_image_brightness()
- set_nuclei_circularity()
- set_nuclei_size()
- set_nuclei_min_mean_intensity()
- set_chromatinization_size()
- set_chromatinization_ratio()
- set_chromatinization_cut_point()
- set_adj_chrom_gamma()
- set_adj_chrom_contrast()
- set_adj_chrom_brightness()
Parameters
path_to_images : str Path to the directory containing images for analysis.
file_extension : str, optional Extension of the image files. Default is 'tiff'.
selected_id : list, optional List of IDs that must be part of the image name to distinguish them from others. Default is an empty list, which means all images in the directory will be processed.
fille_name_part : str, optional Part of the file name to filter images. Default is an empty string.
selection_opt : bool, optional
Whether to run select_nuclei() with the defined parameters. Default is True.
include_img : bool, optional Whether to include the images in the result dictionary. Default is True.
test_series : int, optional Number of images to test the parameters and return results. Default is 0, which means all images in the directory will be processed.
Returns
results_dict : dict Dictionary containing results for each image in the directory. Keys correspond to image file names.
Notes
This method runs the complete nuclei and chromatinization analysis pipeline.
Parameters must be set appropriately before calling to ensure correct results.
2790 def series_analysis_nuclei( 2791 self, 2792 path_to_images: str, 2793 file_extension: str = "tiff", 2794 selected_id: list = [], 2795 fille_name_part: str = "", 2796 selection_opt: bool = True, 2797 include_img: bool = True, 2798 test_series: int = 0, 2799 ): 2800 """ 2801 Performs analysis on the image provided by the ``input_image()`` method 2802 using default or user-defined parameters. 2803 2804 This method runs nuclei detection and selection using the currently set 2805 parameters. Users can adjust image preprocessing and nuclei detection 2806 parameters before running the analysis. 2807 2808 To show current parameters, use: 2809 - ``current_parameters_nuclei`` 2810 - ``current_parameters_img_adj`` 2811 2812 To set new parameters, use: 2813 - ``set_nms()`` 2814 - ``set_prob()`` 2815 - ``set_adj_image_gamma()`` 2816 - ``set_adj_image_contrast()`` 2817 - ``set_adj_image_brightness()`` 2818 - ``set_nuclei_circularity()`` 2819 - ``set_nuclei_size()`` 2820 - ``set_nuclei_min_mean_intensity()`` 2821 2822 Parameters 2823 ---------- 2824 path_to_images : str 2825 Path to the directory containing images for analysis. 2826 2827 file_extension : str, optional 2828 Extension of the image files. Default is 'tiff'. 2829 2830 selected_id : list, optional 2831 List of IDs that must be part of the image name to distinguish them 2832 from others. Default is an empty list, which means all images in 2833 the directory will be processed. 2834 2835 fille_name_part : str, optional 2836 Part of the file name to filter images. Default is an empty string. 2837 2838 selection_opt : bool, optional 2839 Whether to run the ``select_nuclei()`` method with the defined parameters. 2840 Default is True. 2841 2842 include_img : bool, optional 2843 Whether to include the images in the result dictionary. Default is True. 2844 2845 test_series : int, optional 2846 Number of images to test the parameters and return results. Default is 0, 2847 which means all images in the directory will be processed. 2848 2849 Returns 2850 ------- 2851 results_dict : dict 2852 Dictionary containing results for each image in the directory. 2853 Keys correspond to image file names. 2854 """ 2855 2856 results_dict = {} 2857 results_img = {} 2858 2859 files = glob.glob(os.path.join(path_to_images, "*." + file_extension)) 2860 2861 if len(fille_name_part) > 0: 2862 files = [x for x in files if fille_name_part.lower() in x.lower()] 2863 2864 if len(selected_id) > 0: 2865 selected_id = [str(x) for x in selected_id] 2866 files = [ 2867 x 2868 for x in files 2869 if re.sub("_.*", "", os.path.basename(x)) in selected_id 2870 ] 2871 2872 if test_series > 0: 2873 2874 files = random.sample(files, test_series) 2875 2876 self.show_plots = False 2877 self.series_im = True 2878 2879 print("\nFile analysis:\n\n") 2880 2881 for file in tqdm(files): 2882 2883 print(file) 2884 2885 image = self.load_image(file) 2886 2887 self.input_image(image) 2888 2889 self.find_nuclei() 2890 2891 if self.nuclei_results["nuclei"] is not None: 2892 2893 tmp = [None] 2894 2895 if selection_opt is True: 2896 self.select_nuclei() 2897 tmp = self.get_results_nuclei_selected() 2898 2899 else: 2900 tmp = self.get_results_nuclei() 2901 2902 if tmp is not None: 2903 2904 if tmp[0] is not None: 2905 2906 if include_img: 2907 results_dict[str(os.path.basename(file))] = tmp[0] 2908 results_img[str(os.path.basename(file))] = tmp[1] 2909 2910 del tmp 2911 2912 else: 2913 results_dict[str(os.path.basename(file))] = tmp[0] 2914 del tmp 2915 2916 else: 2917 print(f"Unable to obtain results for {print(file)}") 2918 2919 else: 2920 2921 print(f"Unable to obtain results for {print(file)}") 2922 2923 self.show_plots = True 2924 self.series_im = False 2925 2926 results_dict_tmp = self.repairing_nuclei(results_dict) 2927 2928 if include_img is False: 2929 2930 return results_dict_tmp 2931 2932 else: 2933 2934 results_dict = {} 2935 2936 for ke in results_dict_tmp.keys(): 2937 2938 nuclei_mask = adjust_img_16bit( 2939 cv2.cvtColor( 2940 self.create_mask(results_dict_tmp[ke], results_img[ke]), 2941 cv2.COLOR_BGR2GRAY, 2942 ), 2943 color="blue", 2944 ) 2945 concatenated_image = cv2.hconcat([results_img[ke], nuclei_mask]) 2946 2947 cred = results_dict_tmp[ke] 2948 # cred.pop('coords') 2949 2950 results_dict[ke] = {"stats": cred, "img": concatenated_image} 2951 2952 return results_dict
Performs analysis on the image provided by the input_image() method
using default or user-defined parameters.
This method runs nuclei detection and selection using the currently set parameters. Users can adjust image preprocessing and nuclei detection parameters before running the analysis.
To show current parameters, use:
- current_parameters_nuclei
- current_parameters_img_adj
To set new parameters, use:
- set_nms()
- set_prob()
- set_adj_image_gamma()
- set_adj_image_contrast()
- set_adj_image_brightness()
- set_nuclei_circularity()
- set_nuclei_size()
- set_nuclei_min_mean_intensity()
Parameters
path_to_images : str Path to the directory containing images for analysis.
file_extension : str, optional Extension of the image files. Default is 'tiff'.
selected_id : list, optional List of IDs that must be part of the image name to distinguish them from others. Default is an empty list, which means all images in the directory will be processed.
fille_name_part : str, optional Part of the file name to filter images. Default is an empty string.
selection_opt : bool, optional
Whether to run the select_nuclei() method with the defined parameters.
Default is True.
include_img : bool, optional Whether to include the images in the result dictionary. Default is True.
test_series : int, optional Number of images to test the parameters and return results. Default is 0, which means all images in the directory will be processed.
Returns
results_dict : dict Dictionary containing results for each image in the directory. Keys correspond to image file names.
Inherited Members
2955class NucleiDataManagement: 2956 """ 2957 Manages nuclei analysis data obtained from the `NucleiFinder` class, 2958 including nuclei properties and optionally Image Stream (IS) data. 2959 2960 This class allows loading nuclei data from JSON files or directly from 2961 `NucleiFinder` analysis results, converting them to pandas DataFrames, 2962 adding IS data, concatenating results from multiple experiments, and 2963 saving results in JSON or CSV format. It also provides helper methods 2964 for merging, filtering, and retrieving data. 2965 2966 Attributes 2967 ---------- 2968 nuceli_data : dict 2969 Dictionary storing nuclei properties for each image or experiment. 2970 2971 experiment_name : str 2972 Name of the experiment. 2973 2974 nuceli_data_df : pd.DataFrame or None 2975 DataFrame representation of nuclei properties. 2976 2977 nuclei_IS_data : pd.DataFrame or None 2978 DataFrame of nuclei data merged with IS data. 2979 2980 concat_data : list or None 2981 List of other `NucleiDataManagement` objects added for combined analysis. 2982 2983 Methods 2984 ------- 2985 load_nuc_dict(path) 2986 Load nuclei data from a JSON dictionary file (*.nuc) and initialize the object. 2987 _convert_to_df() 2988 Convert nuclei dictionary data to a pandas DataFrame. 2989 2990 add_IS_data(IS_data, IS_features) 2991 Merge Image Stream (IS) data with nuclei data. 2992 2993 get_data() 2994 Retrieve the nuclei data as a pandas DataFrame. 2995 2996 get_data_with_IS() 2997 Retrieve the nuclei data merged with IS data. 2998 2999 save_nuc_project(path) 3000 Save nuclei data as a JSON file with *.nuc extension. 3001 3002 save_results_df(path) 3003 Save nuclei data as a CSV file. 3004 3005 save_results_df_with_IS(path) 3006 Save nuclei data merged with IS data as a CSV file. 3007 3008 add_experiment(data_list) 3009 Add other `NucleiDataManagement` objects for concatenated analysis. 3010 3011 get_mutual_experiments_data(inc_is) 3012 Retrieve concatenated nuclei data from multiple experiments. 3013 3014 save_mutual_experiments(path, inc_is) 3015 Save concatenated data from multiple experiments as a CSV file. 3016 """ 3017 3018 def __init__(self, nuclei_data: dict, experiment_name: str): 3019 """ 3020 Initialize a NucleiDataManagement object with nuclei data and experiment name. 3021 3022 Parameters 3023 ---------- 3024 nuclei_data : dict 3025 Dictionary containing nuclei properties for each image or experiment. 3026 If the dictionary entries have keys 'stats' and 'img', only 'stats' are stored. 3027 3028 experiment_name : str 3029 Name of the experiment. 3030 3031 Attributes 3032 ---------- 3033 nuceli_data : dict 3034 Dictionary storing nuclei properties for each image or experiment. 3035 3036 experiment_name : str 3037 Name of the experiment. 3038 3039 nuceli_data_df : pd.DataFrame or None 3040 DataFrame representation of nuclei properties (initialized as None). 3041 3042 nuclei_IS_data : pd.DataFrame or None 3043 DataFrame of nuclei data merged with Image Stream (IS) data (initialized as None). 3044 3045 concat_data : list or None 3046 List of other `NucleiDataManagement` objects added for combined analysis (initialized as None). 3047 """ 3048 3049 if set(nuclei_data[list(nuclei_data.keys())[0]].keys()) == set( 3050 ["stats", "img"] 3051 ): 3052 3053 self.nuceli_data = {} 3054 3055 for k in nuclei_data.keys(): 3056 self.nuceli_data[k] = nuclei_data[k]["stats"] 3057 3058 for k in self.nuceli_data.keys(): 3059 if "coords" in self.nuceli_data[k].keys(): 3060 self.nuceli_data[k].pop("coords") 3061 3062 else: 3063 self.nuceli_data = nuclei_data 3064 3065 for k in self.nuceli_data.keys(): 3066 if "coords" in self.nuceli_data[k].keys(): 3067 self.nuceli_data[k].pop("coords") 3068 3069 self.experiment_name = experiment_name 3070 """Name of the experiment.""" 3071 3072 self.nuceli_data_df = None 3073 """Stored DataFrame representation of nuclei features""" 3074 3075 self.nuclei_IS_data = None 3076 """Stored DataFrame of data from Image Stream (IS).""" 3077 3078 self.concat_data = None 3079 """Sotored list of other `NucleiDataManagement` objects.""" 3080 3081 @classmethod 3082 def load_nuc_dict(cls, path: str): 3083 """ 3084 Initialize a NucleiDataManagement object from a JSON dictionary file. 3085 3086 The loaded data must be previously saved using the ``save_nuc_project()`` method. 3087 3088 Parameters 3089 ---------- 3090 path : str 3091 Path to the *.nuc JSON file containing nuclei data. 3092 """ 3093 3094 if ".nuc" in path: 3095 3096 if os.path.exists(path): 3097 3098 with open(path, "r") as json_file: 3099 loaded_data = json.load(json_file) 3100 3101 return cls(loaded_data, os.path.splitext(os.path.basename(path))[0]) 3102 3103 else: 3104 raise ValueError("\nInvalid path!") 3105 3106 else: 3107 raise ValueError( 3108 "\nInvalid dictionary to load. It must contain a .nuc extension!" 3109 ) 3110 3111 def _convert_to_df(self): 3112 """ 3113 Helper method that converts the internal nuclei dictionary into a pandas DataFrame. 3114 3115 This method iterates over the nuclei data stored in `self.nuceli_data`, 3116 flattens the information for each nucleus, computes aggregate statistics 3117 for associated spots if present, and stores the resulting DataFrame in 3118 `self.nuceli_data_df`. 3119 """ 3120 3121 nuclei_data = self.nuceli_data 3122 3123 data = [] 3124 3125 for i in tqdm(nuclei_data.keys()): 3126 for n, _ in enumerate(nuclei_data[i]["area"]): 3127 row = { 3128 "id_name": re.sub("_.*", "", i), 3129 "nuclei_area": nuclei_data[i]["area"][n], 3130 "nuclei_area_bbox": nuclei_data[i]["area_bbox"][n], 3131 "nuclei_equivalent_diameter_area": nuclei_data[i][ 3132 "equivalent_diameter_area" 3133 ][n], 3134 "nuclei_feret_diameter_max": nuclei_data[i]["feret_diameter_max"][ 3135 n 3136 ], 3137 "nuclei_axis_major_length": nuclei_data[i]["axis_major_length"][n], 3138 "nuclei_axis_minor_length": nuclei_data[i]["axis_minor_length"][n], 3139 "nuclei_circularity": nuclei_data[i]["circularity"][n], 3140 "nuclei_eccentricity": nuclei_data[i]["eccentricity"][n], 3141 "nuclei_perimeter": nuclei_data[i]["perimeter"][n], 3142 "nuclei_ratio": nuclei_data[i]["ratio"][n], 3143 "nuclei_solidity": nuclei_data[i]["solidity"][n], 3144 } 3145 3146 if "spot_size_area" in nuclei_data[i]: 3147 if len(nuclei_data[i]["spot_size_area"][n]) > 0: 3148 row.update( 3149 { 3150 "spot_n": len(nuclei_data[i]["spot_size_area"][n]), 3151 "avg_spot_area": np.mean( 3152 nuclei_data[i]["spot_size_area"][n] 3153 ), 3154 "avg_spot_area_bbox": np.mean( 3155 nuclei_data[i]["spot_size_area_bbox"][n] 3156 ), 3157 "avg_spot_perimeter": np.mean( 3158 nuclei_data[i]["spot_perimeter"][n] 3159 ), 3160 "sum_spot_area": np.sum( 3161 nuclei_data[i]["spot_size_area"][n] 3162 ), 3163 "sum_spot_area_bbox": np.sum( 3164 nuclei_data[i]["spot_size_area_bbox"][n] 3165 ), 3166 "sum_spot_perimeter": np.sum( 3167 nuclei_data[i]["spot_perimeter"][n] 3168 ), 3169 "avg_spot_axis_major_length": np.mean( 3170 nuclei_data[i]["spot_axis_major_length"][n] 3171 ), 3172 "avg_spot_axis_minor_length": np.mean( 3173 nuclei_data[i]["spot_axis_minor_length"][n] 3174 ), 3175 "avg_spot_eccentricity": np.mean( 3176 nuclei_data[i]["spot_eccentricity"][n] 3177 ), 3178 "avg_spot_size_equivalent_diameter_area": np.mean( 3179 nuclei_data[i][ 3180 "spot_size_equivalent_diameter_area" 3181 ][n] 3182 ), 3183 "sum_spot_size_equivalent_diameter_area": np.sum( 3184 nuclei_data[i][ 3185 "spot_size_equivalent_diameter_area" 3186 ][n] 3187 ), 3188 } 3189 ) 3190 else: 3191 row.update( 3192 { 3193 k: 0 3194 for k in [ 3195 "spot_n", 3196 "avg_spot_area", 3197 "avg_spot_area_bbox", 3198 "avg_spot_perimeter", 3199 "sum_spot_area", 3200 "sum_spot_area_bbox", 3201 "sum_spot_perimeter", 3202 "avg_spot_axis_major_length", 3203 "avg_spot_axis_minor_length", 3204 "avg_spot_eccentricity", 3205 "avg_spot_size_equivalent_diameter_area", 3206 "sum_spot_size_equivalent_diameter_area", 3207 ] 3208 } 3209 ) 3210 3211 data.append(row) 3212 3213 nuclei_df = pd.DataFrame(data) 3214 3215 nuclei_df["nuclei_per_img"] = nuclei_df.groupby("id_name")["id_name"].transform( 3216 "count" 3217 ) 3218 nuclei_df["set"] = self.experiment_name 3219 3220 self.nuceli_data_df = nuclei_df 3221 3222 def add_IS_data(self, IS_data: pd.DataFrame, IS_features: list = []): 3223 """ 3224 Merge Image Stream (IS) data with nuclei analysis data. 3225 3226 This method concatenates IS (Image Stream, https://cytekbio.com/pages/imagestream) 3227 results with the nuclei data stored in the object. The merge is performed based 3228 on object IDs, allowing joint analysis of nuclei features and IS features. 3229 3230 Parameters 3231 ---------- 3232 IS_data : pd.DataFrame 3233 DataFrame containing IS data results. 3234 3235 IS_features : list, optional 3236 List of features to extract from the IS data. Default is an empty list. 3237 3238 Notes 3239 ----- 3240 The merged data will be stored in the attribute `self.nuclei_IS_data`. 3241 """ 3242 3243 nuclei_data = self._get_df() 3244 3245 IS_data["set"] = self.experiment_name 3246 3247 if len(IS_features) > 0: 3248 IS_features = list(set(IS_features + ["Object Number", "set"])) 3249 IS_data = IS_data[IS_features] 3250 3251 nuclei_data["id"] = ( 3252 nuclei_data["id_name"].astype(str) + "_" + nuclei_data["set"] 3253 ) 3254 IS_data["id"] = IS_data["Object Number"].astype(str) + "_" + IS_data["set"] 3255 3256 merged_data = pd.merge(nuclei_data, IS_data, on="id", how="left") 3257 merged_data.pop("set_x") 3258 merged_data = merged_data.rename(columns={"set_y": "set"}) 3259 3260 self.nuclei_IS_data = merged_data 3261 3262 def _get_df(self): 3263 """ 3264 Helper method to retrieve the nuclei data as a pandas DataFrame. 3265 3266 If the internal DataFrame `self.nuceli_data_df` has not been created yet, 3267 this method calls `_convert_to_df()` to generate it from `self.nuceli_data`. 3268 """ 3269 3270 if self.nuceli_data_df is None: 3271 self._convert_to_df() 3272 3273 return self.nuceli_data_df 3274 3275 def get_data_with_IS(self): 3276 """ 3277 Retrieve nuclei results for a single project including IS data. 3278 3279 Returns 3280 ------- 3281 pd.DataFrame or None 3282 DataFrame containing nuclei results merged with IS (Image Stream) data 3283 added via `self.add_IS_data()`. Returns None if no IS data has been added. 3284 """ 3285 3286 if self.nuclei_IS_data is None: 3287 print("\nNothing to return!") 3288 return self.nuclei_IS_data 3289 3290 def get_data(self): 3291 """ 3292 Retrieve nuclei results for a single project as a pandas DataFrame. 3293 3294 Returns 3295 ------- 3296 pd.DataFrame 3297 DataFrame containing nuclei analysis results for the experiment. 3298 """ 3299 3300 return self._get_df() 3301 3302 def save_nuc_project(self, path: str = ""): 3303 """ 3304 Save nuclei results as a JSON file with a *.nuc extension. 3305 3306 The saved data can later be loaded using the `cls.load_nuc_dict()` method. 3307 Results must be obtained from the `NucleiFinder` class using 3308 `series_analysis_nuclei()` or `series_analysis_chromatinization()` methods. 3309 3310 Parameters 3311 ---------- 3312 path : str, optional 3313 Directory where the results will be saved. Default is the current working directory. 3314 """ 3315 3316 data = self.nuceli_data 3317 3318 if len(data.keys()) > 0: 3319 full_path = os.path.join(path, self.experiment_name) 3320 3321 with open(full_path + ".nuc", "w") as json_file: 3322 json.dump(data, json_file, indent=4) 3323 else: 3324 print("\nData not provided!") 3325 3326 def save_results_df(self, path: str = ""): 3327 """ 3328 Save nuclei results for a single project as a CSV file. 3329 3330 Results must be obtained from the `NucleiFinder` class using 3331 `series_analysis_nuclei()` or `series_analysis_chromatinization()` methods. 3332 3333 Parameters 3334 ---------- 3335 path : str, optional 3336 Directory where the CSV file will be saved. Default is the current working directory. 3337 """ 3338 3339 data = self.get_data() 3340 3341 full_path = os.path.join(path, f"{self.experiment_name}.csv") 3342 3343 data.to_csv(full_path, index=False) 3344 3345 def save_results_df_with_IS(self, path: str = ""): 3346 """ 3347 Save nuclei results with IS data for a single project as a CSV file. 3348 3349 Results must be obtained from the `NucleiFinder` class using 3350 `series_analysis_nuclei()` or `series_analysis_chromatinization()` methods. 3351 IS data should have been added via `self.add_IS_data()`. 3352 3353 Parameters 3354 ---------- 3355 path : str, optional 3356 Directory where the CSV file will be saved. Default is the current working directory. 3357 """ 3358 3359 data = self.get_data_with_IS() 3360 3361 if data is None: 3362 raise ValueError("There was nothing to save.") 3363 3364 full_path = os.path.join(path, f"{self.experiment_name}_IS.csv") 3365 data.to_csv(full_path, index=False) 3366 3367 def add_experiment(self, data_list: list): 3368 """ 3369 Add additional NucleiDataManagement objects from other experiments for concatenation. 3370 3371 Parameters 3372 ---------- 3373 data_list : list 3374 List of `NucleiDataManagement` objects from separate experiments to be added. 3375 """ 3376 3377 valid_class = [] 3378 for obj in data_list: 3379 if isinstance(obj, self.__class__): 3380 valid_class.append(obj) 3381 else: 3382 print(f"Object {obj} is invalid type.") 3383 3384 self.concat_data = valid_class 3385 3386 def get_mutual_experiments_data(self, inc_is: bool = False): 3387 """ 3388 Retrieve concatenated NucleiDataManagement data from other added experiments. 3389 3390 Parameters 3391 ---------- 3392 inc_is : bool, optional 3393 Whether to include IS (Image Stream) data, if it was added to each experiment. Default is False. 3394 3395 Returns 3396 ------- 3397 pd.DataFrame 3398 Concatenated nuclei data (with or without IS data) from all added experiments. 3399 """ 3400 3401 if self.concat_data is not None: 3402 if inc_is: 3403 3404 try: 3405 final_df = pd.concat( 3406 [x.get_data_with_IS() for x in self.concat_data] 3407 + [self.get_data_with_IS()] 3408 ) 3409 except: 3410 raise ValueError( 3411 "Lack of IS data in some object. Check if the IS data was added to each project." 3412 ) 3413 3414 else: 3415 final_df = pd.concat( 3416 [x.get_data() for x in self.concat_data] + [self.get_data()] 3417 ) 3418 3419 return final_df 3420 3421 raise ValueError("No object to concatenate. Nothing to return!") 3422 3423 def save_mutual_experiments(self, path: str = "", inc_is: bool = False): 3424 """ 3425 Save concatenated NucleiDataManagement data from added experiments as a CSV file. 3426 3427 Parameters 3428 ---------- 3429 inc_is : bool, optional 3430 Whether to include IS (Image Stream) data, if it was added to each experiment. Default is False. 3431 """ 3432 3433 dt = self.get_mutual_experiments_data(inc_is=inc_is) 3434 3435 experimets = [self.experiment_name] + [ 3436 n.experiment_name for n in self.concat_data 3437 ] 3438 3439 experimets_names = "_".join(experimets) 3440 3441 if inc_is: 3442 full_path = os.path.join(path, f"{experimets_names}_IS.csv") 3443 else: 3444 full_path = os.path.join(path, f"{experimets_names}.csv") 3445 3446 dt.to_csv(full_path, index=False)
Manages nuclei analysis data obtained from the NucleiFinder class,
including nuclei properties and optionally Image Stream (IS) data.
This class allows loading nuclei data from JSON files or directly from
NucleiFinder analysis results, converting them to pandas DataFrames,
adding IS data, concatenating results from multiple experiments, and
saving results in JSON or CSV format. It also provides helper methods
for merging, filtering, and retrieving data.
Attributes
nuceli_data : dict Dictionary storing nuclei properties for each image or experiment.
experiment_name : str Name of the experiment.
nuceli_data_df : pd.DataFrame or None DataFrame representation of nuclei properties.
nuclei_IS_data : pd.DataFrame or None DataFrame of nuclei data merged with IS data.
concat_data : list or None
List of other NucleiDataManagement objects added for combined analysis.
Methods
load_nuc_dict(path) Load nuclei data from a JSON dictionary file (*.nuc) and initialize the object. _convert_to_df() Convert nuclei dictionary data to a pandas DataFrame.
add_IS_data(IS_data, IS_features) Merge Image Stream (IS) data with nuclei data.
get_data() Retrieve the nuclei data as a pandas DataFrame.
get_data_with_IS() Retrieve the nuclei data merged with IS data.
save_nuc_project(path) Save nuclei data as a JSON file with *.nuc extension.
save_results_df(path) Save nuclei data as a CSV file.
save_results_df_with_IS(path) Save nuclei data merged with IS data as a CSV file.
add_experiment(data_list)
Add other NucleiDataManagement objects for concatenated analysis.
get_mutual_experiments_data(inc_is) Retrieve concatenated nuclei data from multiple experiments.
save_mutual_experiments(path, inc_is) Save concatenated data from multiple experiments as a CSV file.
3018 def __init__(self, nuclei_data: dict, experiment_name: str): 3019 """ 3020 Initialize a NucleiDataManagement object with nuclei data and experiment name. 3021 3022 Parameters 3023 ---------- 3024 nuclei_data : dict 3025 Dictionary containing nuclei properties for each image or experiment. 3026 If the dictionary entries have keys 'stats' and 'img', only 'stats' are stored. 3027 3028 experiment_name : str 3029 Name of the experiment. 3030 3031 Attributes 3032 ---------- 3033 nuceli_data : dict 3034 Dictionary storing nuclei properties for each image or experiment. 3035 3036 experiment_name : str 3037 Name of the experiment. 3038 3039 nuceli_data_df : pd.DataFrame or None 3040 DataFrame representation of nuclei properties (initialized as None). 3041 3042 nuclei_IS_data : pd.DataFrame or None 3043 DataFrame of nuclei data merged with Image Stream (IS) data (initialized as None). 3044 3045 concat_data : list or None 3046 List of other `NucleiDataManagement` objects added for combined analysis (initialized as None). 3047 """ 3048 3049 if set(nuclei_data[list(nuclei_data.keys())[0]].keys()) == set( 3050 ["stats", "img"] 3051 ): 3052 3053 self.nuceli_data = {} 3054 3055 for k in nuclei_data.keys(): 3056 self.nuceli_data[k] = nuclei_data[k]["stats"] 3057 3058 for k in self.nuceli_data.keys(): 3059 if "coords" in self.nuceli_data[k].keys(): 3060 self.nuceli_data[k].pop("coords") 3061 3062 else: 3063 self.nuceli_data = nuclei_data 3064 3065 for k in self.nuceli_data.keys(): 3066 if "coords" in self.nuceli_data[k].keys(): 3067 self.nuceli_data[k].pop("coords") 3068 3069 self.experiment_name = experiment_name 3070 """Name of the experiment.""" 3071 3072 self.nuceli_data_df = None 3073 """Stored DataFrame representation of nuclei features""" 3074 3075 self.nuclei_IS_data = None 3076 """Stored DataFrame of data from Image Stream (IS).""" 3077 3078 self.concat_data = None 3079 """Sotored list of other `NucleiDataManagement` objects."""
Initialize a NucleiDataManagement object with nuclei data and experiment name.
Parameters
nuclei_data : dict Dictionary containing nuclei properties for each image or experiment. If the dictionary entries have keys 'stats' and 'img', only 'stats' are stored.
experiment_name : str Name of the experiment.
Attributes
nuceli_data : dict Dictionary storing nuclei properties for each image or experiment.
experiment_name : str Name of the experiment.
nuceli_data_df : pd.DataFrame or None DataFrame representation of nuclei properties (initialized as None).
nuclei_IS_data : pd.DataFrame or None DataFrame of nuclei data merged with Image Stream (IS) data (initialized as None).
concat_data : list or None
List of other NucleiDataManagement objects added for combined analysis (initialized as None).
3081 @classmethod 3082 def load_nuc_dict(cls, path: str): 3083 """ 3084 Initialize a NucleiDataManagement object from a JSON dictionary file. 3085 3086 The loaded data must be previously saved using the ``save_nuc_project()`` method. 3087 3088 Parameters 3089 ---------- 3090 path : str 3091 Path to the *.nuc JSON file containing nuclei data. 3092 """ 3093 3094 if ".nuc" in path: 3095 3096 if os.path.exists(path): 3097 3098 with open(path, "r") as json_file: 3099 loaded_data = json.load(json_file) 3100 3101 return cls(loaded_data, os.path.splitext(os.path.basename(path))[0]) 3102 3103 else: 3104 raise ValueError("\nInvalid path!") 3105 3106 else: 3107 raise ValueError( 3108 "\nInvalid dictionary to load. It must contain a .nuc extension!" 3109 )
Initialize a NucleiDataManagement object from a JSON dictionary file.
The loaded data must be previously saved using the save_nuc_project() method.
Parameters
path : str Path to the *.nuc JSON file containing nuclei data.
3222 def add_IS_data(self, IS_data: pd.DataFrame, IS_features: list = []): 3223 """ 3224 Merge Image Stream (IS) data with nuclei analysis data. 3225 3226 This method concatenates IS (Image Stream, https://cytekbio.com/pages/imagestream) 3227 results with the nuclei data stored in the object. The merge is performed based 3228 on object IDs, allowing joint analysis of nuclei features and IS features. 3229 3230 Parameters 3231 ---------- 3232 IS_data : pd.DataFrame 3233 DataFrame containing IS data results. 3234 3235 IS_features : list, optional 3236 List of features to extract from the IS data. Default is an empty list. 3237 3238 Notes 3239 ----- 3240 The merged data will be stored in the attribute `self.nuclei_IS_data`. 3241 """ 3242 3243 nuclei_data = self._get_df() 3244 3245 IS_data["set"] = self.experiment_name 3246 3247 if len(IS_features) > 0: 3248 IS_features = list(set(IS_features + ["Object Number", "set"])) 3249 IS_data = IS_data[IS_features] 3250 3251 nuclei_data["id"] = ( 3252 nuclei_data["id_name"].astype(str) + "_" + nuclei_data["set"] 3253 ) 3254 IS_data["id"] = IS_data["Object Number"].astype(str) + "_" + IS_data["set"] 3255 3256 merged_data = pd.merge(nuclei_data, IS_data, on="id", how="left") 3257 merged_data.pop("set_x") 3258 merged_data = merged_data.rename(columns={"set_y": "set"}) 3259 3260 self.nuclei_IS_data = merged_data
Merge Image Stream (IS) data with nuclei analysis data.
This method concatenates IS (Image Stream, https://cytekbio.com/pages/imagestream) results with the nuclei data stored in the object. The merge is performed based on object IDs, allowing joint analysis of nuclei features and IS features.
Parameters
IS_data : pd.DataFrame DataFrame containing IS data results.
IS_features : list, optional List of features to extract from the IS data. Default is an empty list.
Notes
The merged data will be stored in the attribute self.nuclei_IS_data.
3275 def get_data_with_IS(self): 3276 """ 3277 Retrieve nuclei results for a single project including IS data. 3278 3279 Returns 3280 ------- 3281 pd.DataFrame or None 3282 DataFrame containing nuclei results merged with IS (Image Stream) data 3283 added via `self.add_IS_data()`. Returns None if no IS data has been added. 3284 """ 3285 3286 if self.nuclei_IS_data is None: 3287 print("\nNothing to return!") 3288 return self.nuclei_IS_data
Retrieve nuclei results for a single project including IS data.
Returns
pd.DataFrame or None
DataFrame containing nuclei results merged with IS (Image Stream) data
added via self.add_IS_data(). Returns None if no IS data has been added.
3290 def get_data(self): 3291 """ 3292 Retrieve nuclei results for a single project as a pandas DataFrame. 3293 3294 Returns 3295 ------- 3296 pd.DataFrame 3297 DataFrame containing nuclei analysis results for the experiment. 3298 """ 3299 3300 return self._get_df()
Retrieve nuclei results for a single project as a pandas DataFrame.
Returns
pd.DataFrame DataFrame containing nuclei analysis results for the experiment.
3302 def save_nuc_project(self, path: str = ""): 3303 """ 3304 Save nuclei results as a JSON file with a *.nuc extension. 3305 3306 The saved data can later be loaded using the `cls.load_nuc_dict()` method. 3307 Results must be obtained from the `NucleiFinder` class using 3308 `series_analysis_nuclei()` or `series_analysis_chromatinization()` methods. 3309 3310 Parameters 3311 ---------- 3312 path : str, optional 3313 Directory where the results will be saved. Default is the current working directory. 3314 """ 3315 3316 data = self.nuceli_data 3317 3318 if len(data.keys()) > 0: 3319 full_path = os.path.join(path, self.experiment_name) 3320 3321 with open(full_path + ".nuc", "w") as json_file: 3322 json.dump(data, json_file, indent=4) 3323 else: 3324 print("\nData not provided!")
Save nuclei results as a JSON file with a *.nuc extension.
The saved data can later be loaded using the cls.load_nuc_dict() method.
Results must be obtained from the NucleiFinder class using
series_analysis_nuclei() or series_analysis_chromatinization() methods.
Parameters
path : str, optional Directory where the results will be saved. Default is the current working directory.
3326 def save_results_df(self, path: str = ""): 3327 """ 3328 Save nuclei results for a single project as a CSV file. 3329 3330 Results must be obtained from the `NucleiFinder` class using 3331 `series_analysis_nuclei()` or `series_analysis_chromatinization()` methods. 3332 3333 Parameters 3334 ---------- 3335 path : str, optional 3336 Directory where the CSV file will be saved. Default is the current working directory. 3337 """ 3338 3339 data = self.get_data() 3340 3341 full_path = os.path.join(path, f"{self.experiment_name}.csv") 3342 3343 data.to_csv(full_path, index=False)
Save nuclei results for a single project as a CSV file.
Results must be obtained from the NucleiFinder class using
series_analysis_nuclei() or series_analysis_chromatinization() methods.
Parameters
path : str, optional Directory where the CSV file will be saved. Default is the current working directory.
3345 def save_results_df_with_IS(self, path: str = ""): 3346 """ 3347 Save nuclei results with IS data for a single project as a CSV file. 3348 3349 Results must be obtained from the `NucleiFinder` class using 3350 `series_analysis_nuclei()` or `series_analysis_chromatinization()` methods. 3351 IS data should have been added via `self.add_IS_data()`. 3352 3353 Parameters 3354 ---------- 3355 path : str, optional 3356 Directory where the CSV file will be saved. Default is the current working directory. 3357 """ 3358 3359 data = self.get_data_with_IS() 3360 3361 if data is None: 3362 raise ValueError("There was nothing to save.") 3363 3364 full_path = os.path.join(path, f"{self.experiment_name}_IS.csv") 3365 data.to_csv(full_path, index=False)
Save nuclei results with IS data for a single project as a CSV file.
Results must be obtained from the NucleiFinder class using
series_analysis_nuclei() or series_analysis_chromatinization() methods.
IS data should have been added via self.add_IS_data().
Parameters
path : str, optional Directory where the CSV file will be saved. Default is the current working directory.
3367 def add_experiment(self, data_list: list): 3368 """ 3369 Add additional NucleiDataManagement objects from other experiments for concatenation. 3370 3371 Parameters 3372 ---------- 3373 data_list : list 3374 List of `NucleiDataManagement` objects from separate experiments to be added. 3375 """ 3376 3377 valid_class = [] 3378 for obj in data_list: 3379 if isinstance(obj, self.__class__): 3380 valid_class.append(obj) 3381 else: 3382 print(f"Object {obj} is invalid type.") 3383 3384 self.concat_data = valid_class
Add additional NucleiDataManagement objects from other experiments for concatenation.
Parameters
data_list : list
List of NucleiDataManagement objects from separate experiments to be added.
3386 def get_mutual_experiments_data(self, inc_is: bool = False): 3387 """ 3388 Retrieve concatenated NucleiDataManagement data from other added experiments. 3389 3390 Parameters 3391 ---------- 3392 inc_is : bool, optional 3393 Whether to include IS (Image Stream) data, if it was added to each experiment. Default is False. 3394 3395 Returns 3396 ------- 3397 pd.DataFrame 3398 Concatenated nuclei data (with or without IS data) from all added experiments. 3399 """ 3400 3401 if self.concat_data is not None: 3402 if inc_is: 3403 3404 try: 3405 final_df = pd.concat( 3406 [x.get_data_with_IS() for x in self.concat_data] 3407 + [self.get_data_with_IS()] 3408 ) 3409 except: 3410 raise ValueError( 3411 "Lack of IS data in some object. Check if the IS data was added to each project." 3412 ) 3413 3414 else: 3415 final_df = pd.concat( 3416 [x.get_data() for x in self.concat_data] + [self.get_data()] 3417 ) 3418 3419 return final_df 3420 3421 raise ValueError("No object to concatenate. Nothing to return!")
Retrieve concatenated NucleiDataManagement data from other added experiments.
Parameters
inc_is : bool, optional Whether to include IS (Image Stream) data, if it was added to each experiment. Default is False.
Returns
pd.DataFrame Concatenated nuclei data (with or without IS data) from all added experiments.
3423 def save_mutual_experiments(self, path: str = "", inc_is: bool = False): 3424 """ 3425 Save concatenated NucleiDataManagement data from added experiments as a CSV file. 3426 3427 Parameters 3428 ---------- 3429 inc_is : bool, optional 3430 Whether to include IS (Image Stream) data, if it was added to each experiment. Default is False. 3431 """ 3432 3433 dt = self.get_mutual_experiments_data(inc_is=inc_is) 3434 3435 experimets = [self.experiment_name] + [ 3436 n.experiment_name for n in self.concat_data 3437 ] 3438 3439 experimets_names = "_".join(experimets) 3440 3441 if inc_is: 3442 full_path = os.path.join(path, f"{experimets_names}_IS.csv") 3443 else: 3444 full_path = os.path.join(path, f"{experimets_names}.csv") 3445 3446 dt.to_csv(full_path, index=False)
Save concatenated NucleiDataManagement data from added experiments as a CSV file.
Parameters
inc_is : bool, optional Whether to include IS (Image Stream) data, if it was added to each experiment. Default is False.
3449class GroupAnalysis: 3450 """ 3451 A class for performing multivariate analysis, dimensionality reduction, 3452 clustering, and differential feature analysis (DFA) on biological or 3453 experimental datasets. 3454 3455 This class provides tools for: 3456 - Scaling and PCA of input data 3457 - UMAP embedding and DBSCAN clustering 3458 - Differential Feature Analysis across groups 3459 - Proportion analysis and plotting 3460 - Data selection and merging with metadata 3461 3462 Attributes 3463 ---------- 3464 input_data : pd.DataFrame 3465 The primary dataset containing features for analysis. 3466 3467 input_metadata : pd.DataFrame 3468 Metadata corresponding to the input data, including identifiers and group labels. 3469 3470 tmp_data : pd.DataFrame 3471 Temporary copy of the input data, used for feature selection and filtering. 3472 3473 tmp_metadata : pd.DataFrame 3474 Temporary copy of metadata, used for filtered or subsetted operations. 3475 3476 scaled_data : np.ndarray or None 3477 Scaled version of the temporary dataset (`tmp_data`), updated after `data_scale()`. 3478 3479 PCA_results : np.ndarray or None 3480 Results of PCA transformation applied on scaled data. 3481 3482 var_data : np.ndarray or None 3483 Explained variance ratio from PCA. 3484 3485 knee_plot : matplotlib.figure.Figure or None 3486 Figure of cumulative explained variance for PCA components. 3487 3488 UMAP_data : np.ndarray or None 3489 Embedding results from UMAP dimensionality reduction. 3490 3491 UMAP_plot : dict 3492 Dictionary containing UMAP plots. Keys: 'static' (matplotlib) and 'html' (plotly). 3493 3494 dblabels : list or None 3495 Cluster labels assigned by DBSCAN after UMAP embedding. 3496 3497 explained_variance_ratio : np.ndarray or None 3498 Explained variance ratio of PCA components. 3499 3500 DFA_results : pd.DataFrame or None 3501 Results of Differential Feature Analysis (DFA). 3502 3503 proportion_stats : pd.DataFrame or None 3504 Statistics from proportion analysis. 3505 3506 proportion_plot : matplotlib.figure.Figure or None 3507 Figure of proportion analysis results. 3508 3509 Methods 3510 ------- 3511 resest_project(): 3512 Reset all temporary and analysis results to initial state. 3513 3514 load_data(data, ids_col='id_name', set_col='set'): 3515 Class method to load data and metadata and initialize the object. 3516 3517 groups: 3518 Property returning available groups in the metadata. 3519 3520 get_DFA(), get_PCA(), get_knee_plot(), get_var_data(), get_scaled_data(): 3521 Methods to retrieve previously computed results. 3522 3523 UMAP(), db_scan(), UMAP_on_clusters(): 3524 Methods for dimensionality reduction and clustering visualization. 3525 3526 DFA(meta_group_by='sets', sets={}, n_proc=5): 3527 Perform Differential Feature Analysis. 3528 3529 proportion_analysis(grouping_col='sets', val_col='nuclei_per_img', ...): 3530 Perform and plot proportion analysis across groups. 3531 """ 3532 3533 def __init__( 3534 self, 3535 input_data, 3536 input_metadata, 3537 ): 3538 """ 3539 Initialize a GroupAnalysis instance with data and metadata. 3540 3541 Parameters 3542 ---------- 3543 input_data : pd.DataFrame 3544 Dataset containing features for analysis. Rows represent samples and columns represent features. 3545 3546 input_metadata : pd.DataFrame 3547 Metadata corresponding to `input_data`, including sample identifiers and group labels. 3548 """ 3549 3550 self.input_data = input_data 3551 """Stored input dataset for analysis.""" 3552 3553 self.input_metadata = input_metadata 3554 """Stored metadata associated with `input_data`.""" 3555 3556 self.tmp_metadata = input_metadata 3557 """Temporary copy of `input_data` used for filtering, selection, or scaling.""" 3558 3559 self.tmp_data = input_data 3560 """Temporary copy of `input_metadata` used for filtered operations.""" 3561 3562 self.scaled_data = None 3563 """Stored scaled version of `tmp_data` after normalization or standardization.""" 3564 3565 self.PCA_results = None 3566 """ Stored results of PCA transformation applied on `scaled_data`.""" 3567 3568 self.var_data = None 3569 """Sotred explained variance ratio for PCA components.""" 3570 3571 self.knee_plot = None 3572 """Figure showing cumulative explained variance for PCA.""" 3573 3574 self.UMAP_data = None 3575 """Stored embedding coordinates from UMAP dimensionality reduction.""" 3576 3577 self.UMAP_plot = {"static": {}, "html": {}} 3578 """Stored dictionary containing UMAP plots: 'static' (matplotlib) and 'html' (plotly).""" 3579 3580 self.dblabels = None 3581 """Stored cluster labels assigned by DBSCAN after UMAP embedding.""" 3582 3583 self.explained_variance_ratio = None 3584 """Stored explained variance ratio of PCA components.""" 3585 3586 self.DFA_results = None 3587 """Stored Differential Feature Analysis (DFA) results.""" 3588 3589 self.proportion_stats = None 3590 """Stored statistics from proportion analysis of groups.""" 3591 3592 self.proportion_plot = None 3593 """Figure visualizing proportion analysis results.""" 3594 3595 def resest_project(self): 3596 """ 3597 Resets the project state by clearing or reinitializing various attributes. 3598 3599 This method resets the following attributes to initial values: 3600 - `tmp_metadata` 3601 - `tmp_data` 3602 - `scaled_data` 3603 - `PCA_results` 3604 - `var_data` 3605 - `knee_plot` 3606 - `UMAP_data` 3607 - `UMAP_plot` 3608 - `dblabels` 3609 - `explained_variance_ratio` 3610 - `DFA_results` 3611 3612 This method is typically called to reinitialize the project data and results, preparing the system for new computations or project resets. 3613 """ 3614 3615 self.tmp_metadata = self.input_metadata 3616 self.tmp_data = self.input_data 3617 self.scaled_data = None 3618 self.PCA_results = None 3619 self.var_data = None 3620 self.knee_plot = None 3621 self.UMAP_data = None 3622 self.UMAP_plot = {"static": {}, "html": {}} 3623 self.dblabels = None 3624 self.explained_variance_ratio = None 3625 self.DFA_results = None 3626 self.proportion_stats = None 3627 self.proportion_plot = None 3628 3629 @classmethod 3630 def load_data(cls, data, ids_col: str = "id_name", set_col: str = "set"): 3631 """ 3632 Load data and initialize the class by storing both the feature data and metadata. 3633 3634 Parameters 3635 ---------- 3636 data : pd.DataFrame 3637 Input dataset used for group analysis. Must contain both feature columns and 3638 metadata columns specified by `ids_col` and `set_col`. 3639 3640 ids_col : str, optional 3641 Name of the column containing unique object identifiers. 3642 Default is ``'id_name'``. 3643 3644 set_col : str, optional 3645 Name of the column specifying group or set assignment for each object. 3646 Default is ``'set'``. 3647 3648 Notes 3649 ----- 3650 This method performs in-place initialization of the class and does not return 3651 a separate object. All loaded data and metadata become available through the 3652 class attributes for downstream analysis. 3653 3654 This method updates internal class attributes: 3655 3656 - **input_data** : pd.DataFrame 3657 Cleaned feature table with index set to object IDs. 3658 3659 - **tmp_data** : pd.DataFrame 3660 Copy of `input_data` used for temporary operations. 3661 3662 - **input_metadata** : pd.DataFrame 3663 Metadata containing object IDs and group assignments. 3664 3665 - **tmp_metadata** : pd.DataFrame 3666 Copy of `input_metadata` for temporary operations. 3667 """ 3668 3669 data = data.dropna() 3670 3671 metadata = pd.DataFrame() 3672 metadata["id"] = data[ids_col] 3673 metadata["sets"] = data[set_col] 3674 3675 data.index = data[ids_col] 3676 3677 try: 3678 data.pop("id_name") 3679 except: 3680 None 3681 3682 try: 3683 data.pop("Object Number") 3684 except: 3685 None 3686 3687 return cls(data, metadata) 3688 3689 @property 3690 def groups(self): 3691 """ 3692 Return information about available groups in the metadata for ``self.DFA``. 3693 3694 Returns 3695 ------- 3696 dict 3697 Dictionary mapping each metadata column name to a list of unique groups 3698 available in that column. 3699 """ 3700 3701 try: 3702 return { 3703 "sets": set(self.tmp_metadata["sets"]), 3704 "full_name": set(self.tmp_metadata["full_name"]), 3705 } 3706 except: 3707 return {"sets": set(self.tmp_metadata["sets"])} 3708 3709 def get_DFA(self): 3710 """ 3711 Retrieve the DFA results produced by the ``DFA()`` method. 3712 3713 Returns 3714 ------- 3715 pd.DataFrame 3716 The DFA results stored in ``self.DFA_results``. 3717 """ 3718 3719 if None in self.DFA_results: 3720 print("\nNo results to return! Please run the DFA() method first.") 3721 else: 3722 return self.DFA_results 3723 3724 def get_PCA(self): 3725 """ 3726 Retrieve the PCA results produced by the ``PCA()`` method. 3727 3728 Returns 3729 ------- 3730 np.ndarray 3731 The PCA results stored in ``self.PCA_results``. 3732 """ 3733 3734 if None in self.PCA_results: 3735 print("\nNo results to return! Please run the PCA() method first.") 3736 else: 3737 return self.PCA_results 3738 3739 def get_knee_plot(self, show: bool = True): 3740 """ 3741 Retrieve the knee plot of cumulative explained variance generated by the ``var_plot()`` method. 3742 3743 Parameters 3744 ---------- 3745 show : bool, optional 3746 If ``True`` (default), the knee plot is displayed. 3747 3748 Returns 3749 ------- 3750 matplotlib.figure.Figure 3751 The figure object containing the knee plot. 3752 """ 3753 3754 if self.knee_plot is None: 3755 print("\nNo results to return! Please run the var_plot() method first.") 3756 else: 3757 if cfg._DISPLAY_MODE: 3758 if show is True: 3759 self.knee_plot 3760 try: 3761 display(self.knee_plot) 3762 except: 3763 None 3764 3765 return self.knee_plot 3766 3767 def get_var_data(self): 3768 """ 3769 Retrieve the explained variance data from the ``var_plot()`` method. 3770 3771 Returns 3772 ------- 3773 np.ndarray 3774 Array containing the explained variance values stored in ``self.var_data``. 3775 """ 3776 3777 if None in self.var_data: 3778 print("\nNo results to return! Please run the var_plot() method first.") 3779 else: 3780 return self.var_data 3781 3782 def get_scaled_data(self): 3783 """ 3784 Retrieve the scaled data produced by the ``data_scale()`` method. 3785 3786 Returns 3787 ------- 3788 np.ndarray 3789 Scaled data stored in ``self.scaled_data``. 3790 """ 3791 3792 if None in self.scaled_data: 3793 print("\nNo results to return! Please run the data_scale() method first.") 3794 else: 3795 return self.scaled_data 3796 3797 def get_UMAP_data(self): 3798 """ 3799 Retrieve the UMAP-transformed data generated by the ``UMAP()`` method. 3800 3801 Returns 3802 ------- 3803 np.ndarray 3804 UMAP-embedded data stored in ``self.UMAP_data``. 3805 """ 3806 3807 if None in self.UMAP_data: 3808 print("\nNo results to return! Please run the UMAP() method first.") 3809 else: 3810 return self.UMAP_data 3811 3812 def get_UMAP_plots(self, plot_type: str = "static", show: bool = True): 3813 """ 3814 Retrieve UMAP plots generated by the ``UMAP()`` and/or ``UMAP_on_clusters()`` methods. 3815 3816 Parameters 3817 ---------- 3818 show : bool, optional 3819 Whether to display the UMAP plots. Default is True. 3820 3821 Returns 3822 ------- 3823 dict of matplotlib.figure.Figure 3824 A dictionary containing the UMAP plots. Keys correspond to plot names, and values are the figure objects. 3825 """ 3826 3827 if plot_type == "html": 3828 3829 if len(self.UMAP_plot["html"].keys()) == 0: 3830 print( 3831 "\nNo results to return! Please run the UMAP() and / or UMAP_on_clusters() methods first." 3832 ) 3833 else: 3834 if cfg._DISPLAY_MODE: 3835 if show: 3836 for k in self.UMAP_plot["html"].keys(): 3837 self.UMAP_plot["html"][k] 3838 try: 3839 display(self.UMAP_plot["html"][k]) 3840 except: 3841 None 3842 3843 return self.UMAP_plot["html"] 3844 3845 else: 3846 3847 if len(self.UMAP_plot["static"].keys()) == 0: 3848 print( 3849 "\nNo results to return! Please run the UMAP() and / or UMAP_on_clusters() methods first." 3850 ) 3851 else: 3852 if cfg._DISPLAY_MODE: 3853 if show: 3854 for k in self.UMAP_plot["static"].keys(): 3855 self.UMAP_plot["static"][k] 3856 try: 3857 display(self.UMAP_plot["static"][k]) 3858 except: 3859 None 3860 3861 return self.UMAP_plot["static"] 3862 3863 def select_data(self, features_list: list = []): 3864 """ 3865 Select specific features (columns) from the dataset for further analysis. 3866 3867 Parameters 3868 ---------- 3869 features_list : list of str, optional 3870 List of feature names (column names) to select from the dataset. Default is an empty list, which selects no features. 3871 3872 Notes 3873 ----- 3874 Modifies the `self.tmp_data` attribute to contain only the selected features from `self.input_data`. 3875 """ 3876 3877 dat = self.input_data.copy() 3878 3879 not_in_columns = [name for name in features_list if name not in dat.columns] 3880 3881 if not_in_columns: 3882 print("These names are not in data", not_in_columns) 3883 else: 3884 print("All names are present in data.") 3885 3886 in_columns = [name for name in features_list if name in dat.columns] 3887 3888 dat = dat[in_columns] 3889 3890 self.tmp_data = dat 3891 3892 def data_scale(self): 3893 """ 3894 Scale the data using standardization (z-score normalization). 3895 3896 This method applies `StandardScaler` from scikit-learn to the temporary dataset (`self.tmp_data`) and stores the scaled data. 3897 3898 Notes 3899 ----- 3900 Modifies the `self.scaled_data` attribute to contain the standardized version of `self.tmp_data`. 3901 """ 3902 3903 if None not in self.tmp_data: 3904 3905 def is_id_column(name: str): 3906 name_lower = name.lower() 3907 return name_lower == "id" or "id_" in name_lower or "_id" in name_lower 3908 3909 tmp = self.tmp_data 3910 3911 cols_with_strings = [ 3912 c 3913 for c in tmp.columns 3914 if tmp[c].apply(lambda x: isinstance(x, str)).any() 3915 ] 3916 3917 cols_id_pattern = [c for c in tmp.columns if is_id_column(c)] 3918 3919 cols_to_drop = list(set(cols_id_pattern + cols_with_strings)) 3920 3921 tmp = tmp.drop(columns=cols_to_drop) 3922 3923 scaler = StandardScaler() 3924 3925 self.scaled_data = scaler.fit_transform(tmp) 3926 3927 else: 3928 print( 3929 "\nNo data to scale. Please use the load_data() method first, and optionally the select_data() method." 3930 ) 3931 3932 def PCA(self): 3933 """ 3934 Perform Principal Component Analysis (PCA) on the scaled data. 3935 3936 This method reduces the dimensionality of `self.scaled_data` while retaining the maximum variance. 3937 3938 Notes 3939 ----- 3940 Modifies the `self.PCA_results` attribute with the PCA-transformed data. 3941 """ 3942 3943 if None not in self.scaled_data: 3944 pca = PCA(n_components=self.scaled_data.shape[1]) 3945 self.PCA_results = pca.fit_transform(self.scaled_data) 3946 self.explained_variance_ratio = pca.explained_variance_ratio_ 3947 else: 3948 print("\nNo data for PCA. Please use the data_scale() method first.") 3949 3950 def var_plot(self): 3951 """ 3952 Plot the cumulative explained variance of the principal components from PCA. 3953 3954 This method visualizes the cumulative explained variance to help determine how many components capture most of the variance. 3955 3956 Notes 3957 ----- 3958 Stores results in the following attributes: 3959 - `self.var_data` (np.ndarray): Explained variance ratio for each principal component. 3960 - `self.knee_plot` (matplotlib.figure.Figure): Figure of the cumulative explained variance plot. 3961 """ 3962 3963 if None not in self.PCA_results: 3964 3965 fig, _ = plt.subplots(figsize=(15, 7)) 3966 explained_var = self.explained_variance_ratio 3967 3968 cumulative_var = np.cumsum(explained_var) 3969 3970 # Plot the cumulative explained variance as a function of the number of components 3971 plt.plot(cumulative_var) 3972 plt.xlabel("Number of Components") 3973 plt.ylabel("Cumulative Explained Variance") 3974 plt.title("Explained variance of PCs") 3975 plt.xticks(np.arange(0, len(cumulative_var) + 1, step=1)) 3976 3977 self.var_data = explained_var 3978 self.knee_plot = fig 3979 3980 else: 3981 3982 print( 3983 "\nNo data for variance explanation analysis. Please use the PCA() method first." 3984 ) 3985 3986 def UMAP( 3987 self, 3988 PC_num: int = 5, 3989 factorize_with_metadata: bool = False, 3990 harmonize_sets: bool = True, 3991 n_neighbors: int = 25, 3992 min_dist: float = 0.01, 3993 n_components: int = 2, 3994 width: int = 8, 3995 height: int = 6, 3996 ): 3997 """ 3998 Perform UMAP (Uniform Manifold Approximation and Projection) dimensionality reduction on PCA results. 3999 4000 UMAP is applied to the top principal components, optionally using metadata labels to influence the embedding. Generates both 2D/3D embeddings and visualizations. 4001 4002 Parameters 4003 ---------- 4004 PC_num : int, optional 4005 Number of top principal components to use for UMAP embedding. Default is 5. 4006 4007 factorize_with_metadata : bool, optional 4008 Whether to use metadata (e.g., 'sets') to factorize UMAP embedding. Default is False. 4009 4010 harmonize_sets : bool, optional 4011 If True, applies harmonization across data sets before computing the UMAP embedding. 4012 Default is True. 4013 4014 n_neighbors : int, optional 4015 Number of neighbors for UMAP to compute local structure. Default is 25. 4016 4017 min_dist : float, optional 4018 Minimum distance between points in the low-dimensional embedding. Default is 0.01. 4019 4020 n_components : int, optional 4021 Number of dimensions for the UMAP embedding. Default is 2. 4022 4023 width : int, optional 4024 Width of the generated matplotlib figures (in inches). Default is 8. 4025 4026 height : int, optional 4027 Height of the generated matplotlib figures (in inches). Default is 6. 4028 4029 Notes 4030 ----- 4031 Stores results in the following attributes: 4032 - `self.UMAP_data` (np.ndarray): UMAP-transformed data. 4033 - `self.UMAP_plot['static']['PrimaryUMAP']` (matplotlib.figure.Figure): Static visualization of UMAP embedding. 4034 - `self.UMAP_plot['html']['PrimaryUMAP']` (plotly.graph_objs.Figure): Interactive Plotly visualization of UMAP embedding. 4035 """ 4036 4037 if None not in self.PCA_results: 4038 4039 reducer = umap.UMAP( 4040 n_neighbors=n_neighbors, 4041 min_dist=min_dist, 4042 n_components=n_components, 4043 random_state=42, 4044 ) 4045 4046 pca_res = self.PCA_results 4047 4048 if harmonize_sets: 4049 4050 pca_res = np.array(pca_res) 4051 4052 pca_res = np.array( 4053 harmonize.run_harmony( 4054 pca_res, self.input_metadata, vars_use="sets" 4055 ).Z_corr 4056 ).T 4057 4058 if factorize_with_metadata: 4059 numeric_labels = pd.Categorical(self.tmp_metadata["sets"]).codes 4060 4061 umap_result = reducer.fit_transform( 4062 pca_res[:, : PC_num + 1], y=numeric_labels 4063 ) 4064 4065 else: 4066 umap_result = reducer.fit_transform(pca_res[:, : PC_num + 1]) 4067 4068 umap_result_plot = pd.DataFrame(umap_result.copy()) 4069 4070 umap_result_plot["clusters"] = list(self.tmp_metadata["sets"]) 4071 4072 static_fig = umap_static(umap_result_plot, width=width, height=height) 4073 4074 html_fig = umap_html( 4075 umap_result_plot, width=width * 100, height=height * 100 4076 ) 4077 4078 self.UMAP_data = umap_result 4079 4080 self.UMAP_plot["static"]["PrimaryUMAP"] = static_fig 4081 self.UMAP_plot["html"]["PrimaryUMAP"] = html_fig 4082 4083 else: 4084 4085 print("\nNo data for UMAP. Please use the PCA() method first.") 4086 4087 def db_scan(self, eps=0.5, min_samples: int = 10): 4088 """ 4089 Perform DBSCAN clustering on UMAP-transformed data. 4090 4091 DBSCAN identifies clusters based on density, labeling points in dense regions as clusters and others as noise. 4092 4093 Parameters 4094 ---------- 4095 eps : float, optional 4096 Maximum distance between two points to be considered neighbors. Default is 0.5. 4097 4098 min_samples : int, optional 4099 Minimum number of points required to form a dense region (cluster). Default is 10. 4100 4101 Notes 4102 ----- 4103 Stores the results in the following attribute: 4104 - `self.dblabels` (list of str): Cluster labels assigned by DBSCAN for each point in the UMAP embedding. 4105 """ 4106 4107 if None not in self.UMAP_data: 4108 4109 dbscan = DBSCAN(eps=eps, min_samples=min_samples) 4110 dbscan_labels = dbscan.fit_predict(self.UMAP_data) 4111 self.dblabels = [str(x) for x in dbscan_labels] 4112 4113 else: 4114 4115 print("\nNo data for DBSCAN. Please use the UMAP() method first.") 4116 4117 def UMAP_on_clusters( 4118 self, 4119 min_entities: int = 50, 4120 width: int = 8, 4121 height: int = 6, 4122 n_per_col: int = 20, 4123 ): 4124 """ 4125 Generate UMAP visualizations for clusters filtered by a minimum entity threshold. 4126 4127 This method removes clusters containing fewer than `min_entities` observations 4128 and produces two UMAP visualizations: 4129 4130 1. **Cluster UMAP** – points colored by cluster assignment only. 4131 2. **Cluster × Set UMAP** – points colored by the combination of cluster and set identifier. 4132 4133 Parameters 4134 ---------- 4135 min_entities : int, optional 4136 Minimum number of entities required for a cluster to be included 4137 in the visualization. Default is 50. 4138 4139 width : int, optional 4140 Width of the generated matplotlib figures (in inches). Default is 8. 4141 4142 height : int, optional 4143 Height of the generated matplotlib figures (in inches). Default is 6. 4144 4145 n_per_col : int, optional 4146 Maximum number of legend entries per column. Default is 20. 4147 4148 Notes 4149 ----- 4150 This method updates the following attributes: 4151 4152 - `self.UMAP_plot['static']['ClusterUMAP']` 4153 Static matplotlib figure of the filtered cluster-only UMAP. 4154 4155 - `self.UMAP_plot['html']['ClusterUMAP']` 4156 Interactive HTML version of the cluster-only UMAP. 4157 4158 - `self.UMAP_plot['static']['ClusterXSetsUMAP']` 4159 Static matplotlib figure showing clusters combined with set identifiers. 4160 4161 - `self.UMAP_plot['html']['ClusterXSetsUMAP']` 4162 Interactive HTML version of the cluster × set visualization. 4163 4164 - `self.tmp_data` 4165 Dataset filtered to include only clusters meeting the `min_entities` threshold. 4166 4167 - `self.tmp_metadata` 4168 Metadata corresponding to the filtered dataset. 4169 """ 4170 4171 if None not in self.UMAP_data: 4172 4173 if hasattr(self, "_tmp_data_old"): 4174 self.tmp_data = self._tmp_data_old 4175 4176 if hasattr(self, "_tmp_metadata_old"): 4177 self.tmp_metadata = self._tmp_metadata_old 4178 4179 umap_result = pd.DataFrame(self.UMAP_data.copy()) 4180 umap_result["id"] = self.tmp_metadata.index 4181 umap_result["clusters"] = self.dblabels 4182 umap_result = umap_result[umap_result["clusters"] != "-1"] 4183 tmp_metadata = self.tmp_metadata.copy() 4184 tmp_metadata["clusters"] = self.dblabels 4185 tmp_metadata = tmp_metadata[tmp_metadata["clusters"] != "-1"] 4186 tmp_data = self.tmp_data.copy() 4187 tmp_data.index = self.dblabels 4188 tmp_data = tmp_data[tmp_data.index != "-1"] 4189 4190 label_counts_dict = Counter(self.dblabels) 4191 4192 label_counts = pd.DataFrame.from_dict( 4193 label_counts_dict, orient="index", columns=["count"] 4194 ) 4195 4196 filtered_counts = label_counts[label_counts["count"] > min_entities] 4197 4198 tmp_metadata["full_id"] = list( 4199 tmp_metadata["id"].astype(str) + " # " + tmp_metadata["sets"] 4200 ) 4201 4202 tmp_data.index = tmp_metadata["full_id"] 4203 umap_result["full_id"] = list(tmp_metadata["full_id"]) 4204 4205 umap_result = umap_result[ 4206 umap_result["clusters"].isin(np.array(filtered_counts.index)) 4207 ] 4208 tmp_metadata = tmp_metadata[ 4209 tmp_metadata["clusters"].isin(np.array(filtered_counts.index)) 4210 ] 4211 4212 umap_result = umap_result.sort_values( 4213 by="clusters", key=lambda x: x.astype(int) 4214 ) 4215 4216 tmp_data = tmp_data[tmp_data.index.isin(np.array(tmp_metadata["full_id"]))] 4217 4218 static_fig = umap_static( 4219 umap_result, width=width, height=height, n_per_col=n_per_col 4220 ) 4221 4222 html_fig = umap_html(umap_result, width=width * 100, height=height * 100) 4223 4224 self.UMAP_plot["static"]["ClusterUMAP"] = static_fig 4225 self.UMAP_plot["html"]["ClusterUMAP"] = html_fig 4226 4227 tmp_metadata["full_name"] = list( 4228 tmp_metadata["clusters"] + " # " + tmp_metadata["sets"] 4229 ) 4230 4231 label_counts_dict = Counter(list(tmp_metadata["full_name"])) 4232 4233 label_counts = pd.DataFrame.from_dict( 4234 label_counts_dict, orient="index", columns=["count"] 4235 ) 4236 4237 filtered_counts = label_counts[label_counts["count"] > min_entities] 4238 4239 tmp_data.index = tmp_metadata["full_name"] 4240 umap_result["clusters"] = list(tmp_metadata["full_name"]) 4241 4242 umap_result = umap_result[ 4243 umap_result["clusters"].isin(np.array(filtered_counts.index)) 4244 ] 4245 4246 tmp_metadata = tmp_metadata[ 4247 tmp_metadata["full_name"].isin(np.array(filtered_counts.index)) 4248 ] 4249 4250 tmp_data = tmp_data[tmp_data.index.isin(np.array(filtered_counts.index))] 4251 4252 static_fig = umap_static( 4253 umap_result, width=width, height=height, n_per_col=n_per_col 4254 ) 4255 4256 html_fig = umap_html(umap_result, width=width * 100, height=height * 100) 4257 4258 self.UMAP_plot["static"]["ClusterXSetsUMAP"] = static_fig 4259 4260 self.UMAP_plot["html"]["ClusterXSetsUMAP"] = html_fig 4261 4262 self._tmp_data_old = self.tmp_data 4263 self._tmp_metadata_old = self.tmp_metadata 4264 4265 self.tmp_data = tmp_data 4266 self.tmp_metadata = tmp_metadata 4267 4268 else: 4269 print( 4270 "\nNo data for visualization. Please use the UMAP() and db_scan() methods first." 4271 ) 4272 4273 ## save data 4274 def full_info(self): 4275 """ 4276 Merge data with metadata based on the 'full_id' column. 4277 4278 This method combines `self.tmp_data` and `self.tmp_metadata` into a single DataFrame if the metadata contains a 'full_id' column. If 'full_id' is not present, the method prints a warning to complete the preprocessing pipeline. 4279 4280 Returns 4281 ------- 4282 pd.DataFrame or None 4283 Merged DataFrame containing both data and metadata if 'full_id' exists; otherwise, None. 4284 """ 4285 4286 tmp_data = self.tmp_data.copy() 4287 tmp_metadata = self.tmp_metadata.copy() 4288 4289 if "full_id" in tmp_metadata.columns: 4290 tmp_data.index = tmp_metadata["full_id"] 4291 4292 merged_df = tmp_data.merge( 4293 tmp_metadata, left_index=True, right_on="full_id", how="left" 4294 ) 4295 4296 return merged_df 4297 4298 else: 4299 4300 print("\nMetadata is not completed!") 4301 4302 ################################################################################# 4303 4304 def DFA(self, meta_group_by: str = "sets", sets: dict = {}, n_proc=5): 4305 """ 4306 Perform Differential Feature Analysis (DFA) on specified data groups. 4307 4308 This method conducts DFA using a grouping factor from metadata and a dictionary of sets for comparison. It allows for the identification of significant differences across defined sets. 4309 4310 The analysis includes: 4311 - Mann–Whitney U test 4312 - Percentage of non-zero values 4313 - Means and standard deviations 4314 - Effect size metric (ESM) 4315 - Benjamini–Hochberg FDR correction 4316 - Fold-change and log2 fold-change 4317 4318 Parameters 4319 ---------- 4320 meta_group_by : str, optional 4321 Metadata column used for grouping during the analysis. 4322 Default is ``'sets'``. 4323 To view available grouping categories, use ``self.groups``. 4324 4325 sets : dict, optional 4326 Dictionary defining groups for pairwise comparison. 4327 Keys correspond to group names, and values are lists of labels 4328 belonging to each group. 4329 4330 Example 4331 ------- 4332 >>> sets = { 4333 ... 'healthy': ['21q'], 4334 ... 'disease': ['71q', '77q', '109q'] 4335 ... } 4336 In this configuration, the *healthy* group is compared against the 4337 aggregated *disease* groups. 4338 4339 n_proc : int, optional 4340 Number of CPU cores used for parallel processing. 4341 Default is ``5``. 4342 4343 Returns 4344 ------- 4345 pandas.DataFrame or None 4346 A DataFrame containing statistical results for each feature, including: 4347 4348 - ``feature`` : str 4349 - ``p_val`` : float 4350 - ``adj_pval`` : float 4351 - ``pct_valid`` : float 4352 - ``pct_ctrl`` : float 4353 - ``avg_valid`` : float 4354 - ``avg_ctrl`` : float 4355 - ``sd_valid`` : float 4356 - ``sd_ctrl`` : float 4357 - ``esm`` : float 4358 - ``FC`` : float 4359 - ``log(FC)`` : float 4360 - ``norm_diff`` : float 4361 - ``valid_group`` : str 4362 - ``-log(p_val)`` : float 4363 4364 If ``sets`` is ``None``, results for each group are concatenated. 4365 4366 Returns ``None`` in case of errors or invalid parameters. 4367 4368 Notes 4369 ----- 4370 - Columns containing only zeros are automatically removed. 4371 - p-values equal for both groups produce ``p_val = 1``. 4372 - Benjamini–Hochberg correction is applied separately within each group comparison. 4373 - Fold-change is stabilized using a small, data-derived ``low_factor``. 4374 - Uses ``Mann–Whitney U`` test with ``alternative='two-sided'``. 4375 4376 """ 4377 4378 tmp_data = self.tmp_data.copy() 4379 4380 tmp_data = tmp_data.select_dtypes(include="number") 4381 4382 tmp_metadata = self.tmp_metadata.copy() 4383 4384 if len(sets.keys()) >= 2: 4385 print("\nAnalysis strated on provided sets dictionary and meta_group_by...") 4386 tmp_data.index = list(tmp_metadata[meta_group_by]) 4387 tmp_metadata["sets"] = tmp_metadata[meta_group_by] 4388 results = statistic( 4389 tmp_data.transpose(), sets=sets, metadata=tmp_metadata, n_proc=n_proc 4390 ) 4391 4392 else: 4393 print( 4394 "\nAnalysis strated on for all groups to each other in meta_group_by..." 4395 ) 4396 tmp_data.index = list(tmp_metadata[meta_group_by]) 4397 tmp_metadata["sets"] = tmp_metadata[meta_group_by] 4398 results = statistic( 4399 tmp_data.transpose(), sets=None, metadata=tmp_metadata, n_proc=n_proc 4400 ) 4401 4402 self.DFA_results = results 4403 4404 def heatmap_DFA( 4405 self, 4406 p_value: float | int = 0.05, 4407 top_n: int = 5, 4408 scale: bool = False, 4409 clustering: str | None = "ward", 4410 figsize=(10, 5), 4411 ): 4412 """ 4413 Generate a heatmap of the top DFA features filtered by p-value and log fold change. 4414 4415 Parameters 4416 ---------- 4417 p_value : float or int, optional 4418 Significance threshold used to filter features by their p-value. 4419 Only features with p_val < p_value are included. Default is 0.05. 4420 4421 top_n : int, optional 4422 Number of top features selected per group based on the 'esm' score. 4423 Default is 5. 4424 4425 scale : bool, optional 4426 Whether to apply Min–Max scaling to heatmap values across features. 4427 Default is False. 4428 4429 clustering : str or None, optional 4430 Hierarchical clustering method applied to rows/columns of the heatmap. 4431 If None, clustering is disabled. Default is 'ward'. 4432 4433 figsize : tuple, optional 4434 Size of the resulting matplotlib figure. Default is (10, 5). 4435 4436 Notes 4437 ----- 4438 - Only features with a positive log fold change ('log(FC)' > 0) are considered. 4439 - Heatmap values represent -log10(p_value) for visualization. 4440 - If `scale=True`, values are normalized using Min–Max scaling. 4441 - The generated figure is displayed and stored in `self.DFA_plot`. 4442 """ 4443 4444 df_reduced = self.DFA_results.copy() 4445 4446 df_reduced = df_reduced[df_reduced["log(FC)"] > 0] 4447 4448 df_reduced = df_reduced[df_reduced["p_val"] < p_value] 4449 4450 df_reduced = ( 4451 df_reduced.sort_values(["valid_group", "esm"], ascending=[True, False]) 4452 .groupby("valid_group", as_index=False) 4453 .head(top_n) 4454 ) 4455 4456 heatmap_data = df_reduced.pivot( 4457 index="feature", columns="valid_group", values="-log(p_val)" 4458 ).fillna(0) 4459 4460 label = "-log10(p_value)" 4461 4462 if scale: 4463 label = f"scaled({label})" 4464 scaler = MinMaxScaler() 4465 heatmap_data = pd.DataFrame( 4466 scaler.fit_transform(heatmap_data), 4467 index=heatmap_data.index, 4468 columns=heatmap_data.columns, 4469 ) 4470 4471 if clustering is not None: 4472 Z_rows = linkage(heatmap_data.values, method=clustering) 4473 row_order = leaves_list(Z_rows) 4474 4475 Z_cols = linkage(heatmap_data.values.T, method=clustering) 4476 col_order = leaves_list(Z_cols) 4477 4478 heatmap_data = heatmap_data.iloc[row_order, col_order] 4479 4480 figure = plt.figure(figsize=figsize) 4481 sns.heatmap( 4482 heatmap_data, 4483 cmap="viridis", 4484 linewidths=0.5, 4485 linecolor="gray", 4486 cbar_kws={"label": label}, 4487 fmt=".2f", 4488 ) 4489 plt.ylabel("Feature") 4490 plt.xlabel("Cluster") 4491 plt.xticks(rotation=30, ha="right") 4492 4493 plt.tight_layout() 4494 4495 if cfg._DISPLAY_MODE: 4496 plt.show() 4497 4498 self.DFA_plot = figure 4499 4500 def get_DFA_plot(self, show: bool = True): 4501 """ 4502 Retrieve the heatmap figure generated by `heatmap_DFA()`. 4503 4504 Parameters 4505 ---------- 4506 show : bool, optional 4507 Whether to display the stored heatmap figure. Default is True. 4508 4509 Returns 4510 ------- 4511 matplotlib.figure.Figure 4512 The figure object containing the DFA heatmap. 4513 """ 4514 4515 if self.DFA_plot is None: 4516 print("\nNo results to return! Please run the heatmap_DFA() method first.") 4517 else: 4518 if cfg._DISPLAY_MODE: 4519 if show is True: 4520 self.DFA_plot 4521 try: 4522 display(self.DFA_plot) 4523 except: 4524 None 4525 4526 return self.DFA_plot 4527 4528 def print_avaiable_features(self): 4529 """ 4530 Print the available features (columns) in the current dataset. 4531 4532 This method lists all column names in `self.tmp_data` to help identify which features are available for analysis. 4533 4534 Example 4535 ------- 4536 >>> group_analysis.print_avaiable_features() 4537 """ 4538 4539 print("Avaiable features:") 4540 for cl in self.tmp_data.columns: 4541 print(cl) 4542 4543 def proportion_analysis( 4544 self, 4545 grouping_col: str = "sets", 4546 val_col: str = "nuclei_per_img", 4547 grouping_dict=None, 4548 omit=None, 4549 ): 4550 """ 4551 Perform proportion analysis by comparing the distribution of values across groups. 4552 4553 This method analyzes the distribution of values (e.g., nuclei counts) across different groups defined in the dataset. It can optionally group categories, omit specific values, and produces both statistical results and a visualization. 4554 4555 Parameters 4556 ---------- 4557 grouping_col : str, optional 4558 Column to group by. Default is 'sets'. 4559 4560 val_col : str, optional 4561 Column containing the values to analyze. Default is 'nuclei_per_img'. 4562 4563 grouping_dict : dict or None, optional 4564 Dictionary mapping new group names to categories in `grouping_col`. If None, analysis is based on the original groups. 4565 4566 omit : str, list, or None, optional 4567 Values to exclude from the analysis. Default is None. 4568 4569 Attributes 4570 ---------- 4571 proportion_stats : pd.DataFrame 4572 DataFrame containing chi-square test results for pairwise group comparisons. 4573 4574 proportion_plot : matplotlib.figure.Figure 4575 Plot visualizing the proportions across groups. 4576 4577 Example 4578 ------- 4579 >>> group_analysis.proportion_analysis( 4580 ... grouping_col='sets', 4581 ... val_col='nuclei_per_img', 4582 ... grouping_dict={'Group A': [1, 2], 'Group B': [3, 4]}, 4583 ... omit=5 4584 ... ) 4585 """ 4586 4587 andata = self.tmp_data.copy() 4588 4589 andata[grouping_col] = list(self.tmp_metadata[grouping_col]) 4590 4591 andata = andata[[grouping_col, val_col]] 4592 4593 if omit is not None: 4594 if isinstance(omit, list): 4595 andata = andata[~andata[val_col].isin(omit)] 4596 else: 4597 andata = andata[andata[val_col] != omit] 4598 4599 andata = andata.reset_index(drop=True) 4600 andata["index_col"] = andata.index 4601 4602 if isinstance(grouping_dict, dict): 4603 for k in grouping_dict.keys(): 4604 andata.loc[ 4605 andata[grouping_col].isin(grouping_dict[k]), grouping_col 4606 ] = k 4607 4608 df_pivot = andata.pivot_table( 4609 index=val_col, 4610 columns=grouping_col, 4611 values="index_col", 4612 aggfunc="count", 4613 fill_value=0, 4614 ) 4615 4616 Z_rows = linkage(df_pivot.values, method="ward") 4617 row_order = leaves_list(Z_rows) 4618 4619 Z_cols = linkage(df_pivot.values.T, method="ward") 4620 col_order = leaves_list(Z_cols) 4621 4622 df_pivot = df_pivot.iloc[row_order, col_order] 4623 4624 chi_df = chi_pairs(df_pivot) 4625 4626 self.proportion_stats = chi_pairs(df_pivot) 4627 4628 chi_df["Significance_Label"] = chi_df["p-value"].apply(get_significance_label) 4629 4630 self.proportion_plot = prop_plot(df_pivot, chi_df) 4631 4632 def get_proportion_plot(self, show: bool = True): 4633 """ 4634 Retrieve the proportion bar plot generated by the `proportion_analysis()` method. 4635 4636 Parameters 4637 ---------- 4638 show : bool, optional 4639 Whether to display the proportion bar plot. Default is True. 4640 4641 Returns 4642 ------- 4643 matplotlib.figure.Figure 4644 The figure object containing the proportion bar plot. 4645 """ 4646 4647 if self.proportion_plot is None: 4648 print( 4649 "\nNo results to return! Please run the proportion_analysis() method first." 4650 ) 4651 else: 4652 if cfg._DISPLAY_MODE: 4653 if show: 4654 self.proportion_plot 4655 try: 4656 display(self.proportion_plot) 4657 except: 4658 None 4659 4660 return self.proportion_plot 4661 4662 def get_proportion_stats(self): 4663 """ 4664 Retrieve the proportion statistics computed by the `proportion_analysis()` method. 4665 4666 Returns 4667 ------- 4668 pd.DataFrame 4669 The proportion statistics stored in `self.proportion_stats`. 4670 """ 4671 4672 if None in self.proportion_stats: 4673 print( 4674 "\nNo results to return! Please run the proportion_analysis() method first." 4675 ) 4676 else: 4677 return self.proportion_stats
A class for performing multivariate analysis, dimensionality reduction, clustering, and differential feature analysis (DFA) on biological or experimental datasets.
This class provides tools for:
- Scaling and PCA of input data
- UMAP embedding and DBSCAN clustering
- Differential Feature Analysis across groups
- Proportion analysis and plotting
- Data selection and merging with metadata
Attributes
input_data : pd.DataFrame The primary dataset containing features for analysis.
input_metadata : pd.DataFrame Metadata corresponding to the input data, including identifiers and group labels.
tmp_data : pd.DataFrame Temporary copy of the input data, used for feature selection and filtering.
tmp_metadata : pd.DataFrame Temporary copy of metadata, used for filtered or subsetted operations.
scaled_data : np.ndarray or None
Scaled version of the temporary dataset (tmp_data), updated after data_scale().
PCA_results : np.ndarray or None Results of PCA transformation applied on scaled data.
var_data : np.ndarray or None Explained variance ratio from PCA.
knee_plot : matplotlib.figure.Figure or None Figure of cumulative explained variance for PCA components.
UMAP_data : np.ndarray or None Embedding results from UMAP dimensionality reduction.
UMAP_plot : dict Dictionary containing UMAP plots. Keys: 'static' (matplotlib) and 'html' (plotly).
dblabels : list or None Cluster labels assigned by DBSCAN after UMAP embedding.
explained_variance_ratio : np.ndarray or None Explained variance ratio of PCA components.
DFA_results : pd.DataFrame or None Results of Differential Feature Analysis (DFA).
proportion_stats : pd.DataFrame or None Statistics from proportion analysis.
proportion_plot : matplotlib.figure.Figure or None Figure of proportion analysis results.
Methods
resest_project(): Reset all temporary and analysis results to initial state.
load_data(data, ids_col='id_name', set_col='set'): Class method to load data and metadata and initialize the object.
groups: Property returning available groups in the metadata.
get_DFA(), get_PCA(), get_knee_plot(), get_var_data(), get_scaled_data(): Methods to retrieve previously computed results.
UMAP(), db_scan(), UMAP_on_clusters(): Methods for dimensionality reduction and clustering visualization.
DFA(meta_group_by='sets', sets={}, n_proc=5): Perform Differential Feature Analysis.
proportion_analysis(grouping_col='sets', val_col='nuclei_per_img', ...): Perform and plot proportion analysis across groups.
3533 def __init__( 3534 self, 3535 input_data, 3536 input_metadata, 3537 ): 3538 """ 3539 Initialize a GroupAnalysis instance with data and metadata. 3540 3541 Parameters 3542 ---------- 3543 input_data : pd.DataFrame 3544 Dataset containing features for analysis. Rows represent samples and columns represent features. 3545 3546 input_metadata : pd.DataFrame 3547 Metadata corresponding to `input_data`, including sample identifiers and group labels. 3548 """ 3549 3550 self.input_data = input_data 3551 """Stored input dataset for analysis.""" 3552 3553 self.input_metadata = input_metadata 3554 """Stored metadata associated with `input_data`.""" 3555 3556 self.tmp_metadata = input_metadata 3557 """Temporary copy of `input_data` used for filtering, selection, or scaling.""" 3558 3559 self.tmp_data = input_data 3560 """Temporary copy of `input_metadata` used for filtered operations.""" 3561 3562 self.scaled_data = None 3563 """Stored scaled version of `tmp_data` after normalization or standardization.""" 3564 3565 self.PCA_results = None 3566 """ Stored results of PCA transformation applied on `scaled_data`.""" 3567 3568 self.var_data = None 3569 """Sotred explained variance ratio for PCA components.""" 3570 3571 self.knee_plot = None 3572 """Figure showing cumulative explained variance for PCA.""" 3573 3574 self.UMAP_data = None 3575 """Stored embedding coordinates from UMAP dimensionality reduction.""" 3576 3577 self.UMAP_plot = {"static": {}, "html": {}} 3578 """Stored dictionary containing UMAP plots: 'static' (matplotlib) and 'html' (plotly).""" 3579 3580 self.dblabels = None 3581 """Stored cluster labels assigned by DBSCAN after UMAP embedding.""" 3582 3583 self.explained_variance_ratio = None 3584 """Stored explained variance ratio of PCA components.""" 3585 3586 self.DFA_results = None 3587 """Stored Differential Feature Analysis (DFA) results.""" 3588 3589 self.proportion_stats = None 3590 """Stored statistics from proportion analysis of groups.""" 3591 3592 self.proportion_plot = None 3593 """Figure visualizing proportion analysis results."""
Initialize a GroupAnalysis instance with data and metadata.
Parameters
input_data : pd.DataFrame Dataset containing features for analysis. Rows represent samples and columns represent features.
input_metadata : pd.DataFrame
Metadata corresponding to input_data, including sample identifiers and group labels.
3595 def resest_project(self): 3596 """ 3597 Resets the project state by clearing or reinitializing various attributes. 3598 3599 This method resets the following attributes to initial values: 3600 - `tmp_metadata` 3601 - `tmp_data` 3602 - `scaled_data` 3603 - `PCA_results` 3604 - `var_data` 3605 - `knee_plot` 3606 - `UMAP_data` 3607 - `UMAP_plot` 3608 - `dblabels` 3609 - `explained_variance_ratio` 3610 - `DFA_results` 3611 3612 This method is typically called to reinitialize the project data and results, preparing the system for new computations or project resets. 3613 """ 3614 3615 self.tmp_metadata = self.input_metadata 3616 self.tmp_data = self.input_data 3617 self.scaled_data = None 3618 self.PCA_results = None 3619 self.var_data = None 3620 self.knee_plot = None 3621 self.UMAP_data = None 3622 self.UMAP_plot = {"static": {}, "html": {}} 3623 self.dblabels = None 3624 self.explained_variance_ratio = None 3625 self.DFA_results = None 3626 self.proportion_stats = None 3627 self.proportion_plot = None
Resets the project state by clearing or reinitializing various attributes.
This method resets the following attributes to initial values:
tmp_metadatatmp_datascaled_dataPCA_resultsvar_dataknee_plotUMAP_dataUMAP_plotdblabelsexplained_variance_ratioDFA_results
This method is typically called to reinitialize the project data and results, preparing the system for new computations or project resets.
3629 @classmethod 3630 def load_data(cls, data, ids_col: str = "id_name", set_col: str = "set"): 3631 """ 3632 Load data and initialize the class by storing both the feature data and metadata. 3633 3634 Parameters 3635 ---------- 3636 data : pd.DataFrame 3637 Input dataset used for group analysis. Must contain both feature columns and 3638 metadata columns specified by `ids_col` and `set_col`. 3639 3640 ids_col : str, optional 3641 Name of the column containing unique object identifiers. 3642 Default is ``'id_name'``. 3643 3644 set_col : str, optional 3645 Name of the column specifying group or set assignment for each object. 3646 Default is ``'set'``. 3647 3648 Notes 3649 ----- 3650 This method performs in-place initialization of the class and does not return 3651 a separate object. All loaded data and metadata become available through the 3652 class attributes for downstream analysis. 3653 3654 This method updates internal class attributes: 3655 3656 - **input_data** : pd.DataFrame 3657 Cleaned feature table with index set to object IDs. 3658 3659 - **tmp_data** : pd.DataFrame 3660 Copy of `input_data` used for temporary operations. 3661 3662 - **input_metadata** : pd.DataFrame 3663 Metadata containing object IDs and group assignments. 3664 3665 - **tmp_metadata** : pd.DataFrame 3666 Copy of `input_metadata` for temporary operations. 3667 """ 3668 3669 data = data.dropna() 3670 3671 metadata = pd.DataFrame() 3672 metadata["id"] = data[ids_col] 3673 metadata["sets"] = data[set_col] 3674 3675 data.index = data[ids_col] 3676 3677 try: 3678 data.pop("id_name") 3679 except: 3680 None 3681 3682 try: 3683 data.pop("Object Number") 3684 except: 3685 None 3686 3687 return cls(data, metadata)
Load data and initialize the class by storing both the feature data and metadata.
Parameters
data : pd.DataFrame
Input dataset used for group analysis. Must contain both feature columns and
metadata columns specified by ids_col and set_col.
ids_col : str, optional
Name of the column containing unique object identifiers.
Default is 'id_name'.
set_col : str, optional
Name of the column specifying group or set assignment for each object.
Default is 'set'.
Notes
This method performs in-place initialization of the class and does not return a separate object. All loaded data and metadata become available through the class attributes for downstream analysis.
This method updates internal class attributes:
input_data : pd.DataFrame Cleaned feature table with index set to object IDs.
tmp_data : pd.DataFrame Copy of
input_dataused for temporary operations.input_metadata : pd.DataFrame Metadata containing object IDs and group assignments.
tmp_metadata : pd.DataFrame Copy of
input_metadatafor temporary operations.
3689 @property 3690 def groups(self): 3691 """ 3692 Return information about available groups in the metadata for ``self.DFA``. 3693 3694 Returns 3695 ------- 3696 dict 3697 Dictionary mapping each metadata column name to a list of unique groups 3698 available in that column. 3699 """ 3700 3701 try: 3702 return { 3703 "sets": set(self.tmp_metadata["sets"]), 3704 "full_name": set(self.tmp_metadata["full_name"]), 3705 } 3706 except: 3707 return {"sets": set(self.tmp_metadata["sets"])}
Return information about available groups in the metadata for self.DFA.
Returns
dict Dictionary mapping each metadata column name to a list of unique groups available in that column.
3709 def get_DFA(self): 3710 """ 3711 Retrieve the DFA results produced by the ``DFA()`` method. 3712 3713 Returns 3714 ------- 3715 pd.DataFrame 3716 The DFA results stored in ``self.DFA_results``. 3717 """ 3718 3719 if None in self.DFA_results: 3720 print("\nNo results to return! Please run the DFA() method first.") 3721 else: 3722 return self.DFA_results
Retrieve the DFA results produced by the DFA() method.
Returns
pd.DataFrame
The DFA results stored in self.DFA_results.
3724 def get_PCA(self): 3725 """ 3726 Retrieve the PCA results produced by the ``PCA()`` method. 3727 3728 Returns 3729 ------- 3730 np.ndarray 3731 The PCA results stored in ``self.PCA_results``. 3732 """ 3733 3734 if None in self.PCA_results: 3735 print("\nNo results to return! Please run the PCA() method first.") 3736 else: 3737 return self.PCA_results
Retrieve the PCA results produced by the PCA() method.
Returns
np.ndarray
The PCA results stored in self.PCA_results.
3739 def get_knee_plot(self, show: bool = True): 3740 """ 3741 Retrieve the knee plot of cumulative explained variance generated by the ``var_plot()`` method. 3742 3743 Parameters 3744 ---------- 3745 show : bool, optional 3746 If ``True`` (default), the knee plot is displayed. 3747 3748 Returns 3749 ------- 3750 matplotlib.figure.Figure 3751 The figure object containing the knee plot. 3752 """ 3753 3754 if self.knee_plot is None: 3755 print("\nNo results to return! Please run the var_plot() method first.") 3756 else: 3757 if cfg._DISPLAY_MODE: 3758 if show is True: 3759 self.knee_plot 3760 try: 3761 display(self.knee_plot) 3762 except: 3763 None 3764 3765 return self.knee_plot
Retrieve the knee plot of cumulative explained variance generated by the var_plot() method.
Parameters
show : bool, optional
If True (default), the knee plot is displayed.
Returns
matplotlib.figure.Figure The figure object containing the knee plot.
3767 def get_var_data(self): 3768 """ 3769 Retrieve the explained variance data from the ``var_plot()`` method. 3770 3771 Returns 3772 ------- 3773 np.ndarray 3774 Array containing the explained variance values stored in ``self.var_data``. 3775 """ 3776 3777 if None in self.var_data: 3778 print("\nNo results to return! Please run the var_plot() method first.") 3779 else: 3780 return self.var_data
Retrieve the explained variance data from the var_plot() method.
Returns
np.ndarray
Array containing the explained variance values stored in self.var_data.
3782 def get_scaled_data(self): 3783 """ 3784 Retrieve the scaled data produced by the ``data_scale()`` method. 3785 3786 Returns 3787 ------- 3788 np.ndarray 3789 Scaled data stored in ``self.scaled_data``. 3790 """ 3791 3792 if None in self.scaled_data: 3793 print("\nNo results to return! Please run the data_scale() method first.") 3794 else: 3795 return self.scaled_data
Retrieve the scaled data produced by the data_scale() method.
Returns
np.ndarray
Scaled data stored in self.scaled_data.
3797 def get_UMAP_data(self): 3798 """ 3799 Retrieve the UMAP-transformed data generated by the ``UMAP()`` method. 3800 3801 Returns 3802 ------- 3803 np.ndarray 3804 UMAP-embedded data stored in ``self.UMAP_data``. 3805 """ 3806 3807 if None in self.UMAP_data: 3808 print("\nNo results to return! Please run the UMAP() method first.") 3809 else: 3810 return self.UMAP_data
Retrieve the UMAP-transformed data generated by the UMAP() method.
Returns
np.ndarray
UMAP-embedded data stored in self.UMAP_data.
3812 def get_UMAP_plots(self, plot_type: str = "static", show: bool = True): 3813 """ 3814 Retrieve UMAP plots generated by the ``UMAP()`` and/or ``UMAP_on_clusters()`` methods. 3815 3816 Parameters 3817 ---------- 3818 show : bool, optional 3819 Whether to display the UMAP plots. Default is True. 3820 3821 Returns 3822 ------- 3823 dict of matplotlib.figure.Figure 3824 A dictionary containing the UMAP plots. Keys correspond to plot names, and values are the figure objects. 3825 """ 3826 3827 if plot_type == "html": 3828 3829 if len(self.UMAP_plot["html"].keys()) == 0: 3830 print( 3831 "\nNo results to return! Please run the UMAP() and / or UMAP_on_clusters() methods first." 3832 ) 3833 else: 3834 if cfg._DISPLAY_MODE: 3835 if show: 3836 for k in self.UMAP_plot["html"].keys(): 3837 self.UMAP_plot["html"][k] 3838 try: 3839 display(self.UMAP_plot["html"][k]) 3840 except: 3841 None 3842 3843 return self.UMAP_plot["html"] 3844 3845 else: 3846 3847 if len(self.UMAP_plot["static"].keys()) == 0: 3848 print( 3849 "\nNo results to return! Please run the UMAP() and / or UMAP_on_clusters() methods first." 3850 ) 3851 else: 3852 if cfg._DISPLAY_MODE: 3853 if show: 3854 for k in self.UMAP_plot["static"].keys(): 3855 self.UMAP_plot["static"][k] 3856 try: 3857 display(self.UMAP_plot["static"][k]) 3858 except: 3859 None 3860 3861 return self.UMAP_plot["static"]
Retrieve UMAP plots generated by the UMAP() and/or UMAP_on_clusters() methods.
Parameters
show : bool, optional Whether to display the UMAP plots. Default is True.
Returns
dict of matplotlib.figure.Figure A dictionary containing the UMAP plots. Keys correspond to plot names, and values are the figure objects.
3863 def select_data(self, features_list: list = []): 3864 """ 3865 Select specific features (columns) from the dataset for further analysis. 3866 3867 Parameters 3868 ---------- 3869 features_list : list of str, optional 3870 List of feature names (column names) to select from the dataset. Default is an empty list, which selects no features. 3871 3872 Notes 3873 ----- 3874 Modifies the `self.tmp_data` attribute to contain only the selected features from `self.input_data`. 3875 """ 3876 3877 dat = self.input_data.copy() 3878 3879 not_in_columns = [name for name in features_list if name not in dat.columns] 3880 3881 if not_in_columns: 3882 print("These names are not in data", not_in_columns) 3883 else: 3884 print("All names are present in data.") 3885 3886 in_columns = [name for name in features_list if name in dat.columns] 3887 3888 dat = dat[in_columns] 3889 3890 self.tmp_data = dat
Select specific features (columns) from the dataset for further analysis.
Parameters
features_list : list of str, optional List of feature names (column names) to select from the dataset. Default is an empty list, which selects no features.
Notes
Modifies the self.tmp_data attribute to contain only the selected features from self.input_data.
3892 def data_scale(self): 3893 """ 3894 Scale the data using standardization (z-score normalization). 3895 3896 This method applies `StandardScaler` from scikit-learn to the temporary dataset (`self.tmp_data`) and stores the scaled data. 3897 3898 Notes 3899 ----- 3900 Modifies the `self.scaled_data` attribute to contain the standardized version of `self.tmp_data`. 3901 """ 3902 3903 if None not in self.tmp_data: 3904 3905 def is_id_column(name: str): 3906 name_lower = name.lower() 3907 return name_lower == "id" or "id_" in name_lower or "_id" in name_lower 3908 3909 tmp = self.tmp_data 3910 3911 cols_with_strings = [ 3912 c 3913 for c in tmp.columns 3914 if tmp[c].apply(lambda x: isinstance(x, str)).any() 3915 ] 3916 3917 cols_id_pattern = [c for c in tmp.columns if is_id_column(c)] 3918 3919 cols_to_drop = list(set(cols_id_pattern + cols_with_strings)) 3920 3921 tmp = tmp.drop(columns=cols_to_drop) 3922 3923 scaler = StandardScaler() 3924 3925 self.scaled_data = scaler.fit_transform(tmp) 3926 3927 else: 3928 print( 3929 "\nNo data to scale. Please use the load_data() method first, and optionally the select_data() method." 3930 )
Scale the data using standardization (z-score normalization).
This method applies StandardScaler from scikit-learn to the temporary dataset (self.tmp_data) and stores the scaled data.
Notes
Modifies the self.scaled_data attribute to contain the standardized version of self.tmp_data.
3932 def PCA(self): 3933 """ 3934 Perform Principal Component Analysis (PCA) on the scaled data. 3935 3936 This method reduces the dimensionality of `self.scaled_data` while retaining the maximum variance. 3937 3938 Notes 3939 ----- 3940 Modifies the `self.PCA_results` attribute with the PCA-transformed data. 3941 """ 3942 3943 if None not in self.scaled_data: 3944 pca = PCA(n_components=self.scaled_data.shape[1]) 3945 self.PCA_results = pca.fit_transform(self.scaled_data) 3946 self.explained_variance_ratio = pca.explained_variance_ratio_ 3947 else: 3948 print("\nNo data for PCA. Please use the data_scale() method first.")
Perform Principal Component Analysis (PCA) on the scaled data.
This method reduces the dimensionality of self.scaled_data while retaining the maximum variance.
Notes
Modifies the self.PCA_results attribute with the PCA-transformed data.
3950 def var_plot(self): 3951 """ 3952 Plot the cumulative explained variance of the principal components from PCA. 3953 3954 This method visualizes the cumulative explained variance to help determine how many components capture most of the variance. 3955 3956 Notes 3957 ----- 3958 Stores results in the following attributes: 3959 - `self.var_data` (np.ndarray): Explained variance ratio for each principal component. 3960 - `self.knee_plot` (matplotlib.figure.Figure): Figure of the cumulative explained variance plot. 3961 """ 3962 3963 if None not in self.PCA_results: 3964 3965 fig, _ = plt.subplots(figsize=(15, 7)) 3966 explained_var = self.explained_variance_ratio 3967 3968 cumulative_var = np.cumsum(explained_var) 3969 3970 # Plot the cumulative explained variance as a function of the number of components 3971 plt.plot(cumulative_var) 3972 plt.xlabel("Number of Components") 3973 plt.ylabel("Cumulative Explained Variance") 3974 plt.title("Explained variance of PCs") 3975 plt.xticks(np.arange(0, len(cumulative_var) + 1, step=1)) 3976 3977 self.var_data = explained_var 3978 self.knee_plot = fig 3979 3980 else: 3981 3982 print( 3983 "\nNo data for variance explanation analysis. Please use the PCA() method first." 3984 )
Plot the cumulative explained variance of the principal components from PCA.
This method visualizes the cumulative explained variance to help determine how many components capture most of the variance.
Notes
Stores results in the following attributes:
self.var_data(np.ndarray): Explained variance ratio for each principal component.self.knee_plot(matplotlib.figure.Figure): Figure of the cumulative explained variance plot.
3986 def UMAP( 3987 self, 3988 PC_num: int = 5, 3989 factorize_with_metadata: bool = False, 3990 harmonize_sets: bool = True, 3991 n_neighbors: int = 25, 3992 min_dist: float = 0.01, 3993 n_components: int = 2, 3994 width: int = 8, 3995 height: int = 6, 3996 ): 3997 """ 3998 Perform UMAP (Uniform Manifold Approximation and Projection) dimensionality reduction on PCA results. 3999 4000 UMAP is applied to the top principal components, optionally using metadata labels to influence the embedding. Generates both 2D/3D embeddings and visualizations. 4001 4002 Parameters 4003 ---------- 4004 PC_num : int, optional 4005 Number of top principal components to use for UMAP embedding. Default is 5. 4006 4007 factorize_with_metadata : bool, optional 4008 Whether to use metadata (e.g., 'sets') to factorize UMAP embedding. Default is False. 4009 4010 harmonize_sets : bool, optional 4011 If True, applies harmonization across data sets before computing the UMAP embedding. 4012 Default is True. 4013 4014 n_neighbors : int, optional 4015 Number of neighbors for UMAP to compute local structure. Default is 25. 4016 4017 min_dist : float, optional 4018 Minimum distance between points in the low-dimensional embedding. Default is 0.01. 4019 4020 n_components : int, optional 4021 Number of dimensions for the UMAP embedding. Default is 2. 4022 4023 width : int, optional 4024 Width of the generated matplotlib figures (in inches). Default is 8. 4025 4026 height : int, optional 4027 Height of the generated matplotlib figures (in inches). Default is 6. 4028 4029 Notes 4030 ----- 4031 Stores results in the following attributes: 4032 - `self.UMAP_data` (np.ndarray): UMAP-transformed data. 4033 - `self.UMAP_plot['static']['PrimaryUMAP']` (matplotlib.figure.Figure): Static visualization of UMAP embedding. 4034 - `self.UMAP_plot['html']['PrimaryUMAP']` (plotly.graph_objs.Figure): Interactive Plotly visualization of UMAP embedding. 4035 """ 4036 4037 if None not in self.PCA_results: 4038 4039 reducer = umap.UMAP( 4040 n_neighbors=n_neighbors, 4041 min_dist=min_dist, 4042 n_components=n_components, 4043 random_state=42, 4044 ) 4045 4046 pca_res = self.PCA_results 4047 4048 if harmonize_sets: 4049 4050 pca_res = np.array(pca_res) 4051 4052 pca_res = np.array( 4053 harmonize.run_harmony( 4054 pca_res, self.input_metadata, vars_use="sets" 4055 ).Z_corr 4056 ).T 4057 4058 if factorize_with_metadata: 4059 numeric_labels = pd.Categorical(self.tmp_metadata["sets"]).codes 4060 4061 umap_result = reducer.fit_transform( 4062 pca_res[:, : PC_num + 1], y=numeric_labels 4063 ) 4064 4065 else: 4066 umap_result = reducer.fit_transform(pca_res[:, : PC_num + 1]) 4067 4068 umap_result_plot = pd.DataFrame(umap_result.copy()) 4069 4070 umap_result_plot["clusters"] = list(self.tmp_metadata["sets"]) 4071 4072 static_fig = umap_static(umap_result_plot, width=width, height=height) 4073 4074 html_fig = umap_html( 4075 umap_result_plot, width=width * 100, height=height * 100 4076 ) 4077 4078 self.UMAP_data = umap_result 4079 4080 self.UMAP_plot["static"]["PrimaryUMAP"] = static_fig 4081 self.UMAP_plot["html"]["PrimaryUMAP"] = html_fig 4082 4083 else: 4084 4085 print("\nNo data for UMAP. Please use the PCA() method first.")
Perform UMAP (Uniform Manifold Approximation and Projection) dimensionality reduction on PCA results.
UMAP is applied to the top principal components, optionally using metadata labels to influence the embedding. Generates both 2D/3D embeddings and visualizations.
Parameters
PC_num : int, optional Number of top principal components to use for UMAP embedding. Default is 5.
factorize_with_metadata : bool, optional Whether to use metadata (e.g., 'sets') to factorize UMAP embedding. Default is False.
harmonize_sets : bool, optional If True, applies harmonization across data sets before computing the UMAP embedding. Default is True.
n_neighbors : int, optional Number of neighbors for UMAP to compute local structure. Default is 25.
min_dist : float, optional Minimum distance between points in the low-dimensional embedding. Default is 0.01.
n_components : int, optional Number of dimensions for the UMAP embedding. Default is 2.
width : int, optional Width of the generated matplotlib figures (in inches). Default is 8.
height : int, optional Height of the generated matplotlib figures (in inches). Default is 6.
Notes
Stores results in the following attributes:
self.UMAP_data(np.ndarray): UMAP-transformed data.self.UMAP_plot['static']['PrimaryUMAP'](matplotlib.figure.Figure): Static visualization of UMAP embedding.self.UMAP_plot['html']['PrimaryUMAP'](plotly.graph_objs.Figure): Interactive Plotly visualization of UMAP embedding.
4087 def db_scan(self, eps=0.5, min_samples: int = 10): 4088 """ 4089 Perform DBSCAN clustering on UMAP-transformed data. 4090 4091 DBSCAN identifies clusters based on density, labeling points in dense regions as clusters and others as noise. 4092 4093 Parameters 4094 ---------- 4095 eps : float, optional 4096 Maximum distance between two points to be considered neighbors. Default is 0.5. 4097 4098 min_samples : int, optional 4099 Minimum number of points required to form a dense region (cluster). Default is 10. 4100 4101 Notes 4102 ----- 4103 Stores the results in the following attribute: 4104 - `self.dblabels` (list of str): Cluster labels assigned by DBSCAN for each point in the UMAP embedding. 4105 """ 4106 4107 if None not in self.UMAP_data: 4108 4109 dbscan = DBSCAN(eps=eps, min_samples=min_samples) 4110 dbscan_labels = dbscan.fit_predict(self.UMAP_data) 4111 self.dblabels = [str(x) for x in dbscan_labels] 4112 4113 else: 4114 4115 print("\nNo data for DBSCAN. Please use the UMAP() method first.")
Perform DBSCAN clustering on UMAP-transformed data.
DBSCAN identifies clusters based on density, labeling points in dense regions as clusters and others as noise.
Parameters
eps : float, optional Maximum distance between two points to be considered neighbors. Default is 0.5.
min_samples : int, optional Minimum number of points required to form a dense region (cluster). Default is 10.
Notes
Stores the results in the following attribute:
self.dblabels(list of str): Cluster labels assigned by DBSCAN for each point in the UMAP embedding.
4117 def UMAP_on_clusters( 4118 self, 4119 min_entities: int = 50, 4120 width: int = 8, 4121 height: int = 6, 4122 n_per_col: int = 20, 4123 ): 4124 """ 4125 Generate UMAP visualizations for clusters filtered by a minimum entity threshold. 4126 4127 This method removes clusters containing fewer than `min_entities` observations 4128 and produces two UMAP visualizations: 4129 4130 1. **Cluster UMAP** – points colored by cluster assignment only. 4131 2. **Cluster × Set UMAP** – points colored by the combination of cluster and set identifier. 4132 4133 Parameters 4134 ---------- 4135 min_entities : int, optional 4136 Minimum number of entities required for a cluster to be included 4137 in the visualization. Default is 50. 4138 4139 width : int, optional 4140 Width of the generated matplotlib figures (in inches). Default is 8. 4141 4142 height : int, optional 4143 Height of the generated matplotlib figures (in inches). Default is 6. 4144 4145 n_per_col : int, optional 4146 Maximum number of legend entries per column. Default is 20. 4147 4148 Notes 4149 ----- 4150 This method updates the following attributes: 4151 4152 - `self.UMAP_plot['static']['ClusterUMAP']` 4153 Static matplotlib figure of the filtered cluster-only UMAP. 4154 4155 - `self.UMAP_plot['html']['ClusterUMAP']` 4156 Interactive HTML version of the cluster-only UMAP. 4157 4158 - `self.UMAP_plot['static']['ClusterXSetsUMAP']` 4159 Static matplotlib figure showing clusters combined with set identifiers. 4160 4161 - `self.UMAP_plot['html']['ClusterXSetsUMAP']` 4162 Interactive HTML version of the cluster × set visualization. 4163 4164 - `self.tmp_data` 4165 Dataset filtered to include only clusters meeting the `min_entities` threshold. 4166 4167 - `self.tmp_metadata` 4168 Metadata corresponding to the filtered dataset. 4169 """ 4170 4171 if None not in self.UMAP_data: 4172 4173 if hasattr(self, "_tmp_data_old"): 4174 self.tmp_data = self._tmp_data_old 4175 4176 if hasattr(self, "_tmp_metadata_old"): 4177 self.tmp_metadata = self._tmp_metadata_old 4178 4179 umap_result = pd.DataFrame(self.UMAP_data.copy()) 4180 umap_result["id"] = self.tmp_metadata.index 4181 umap_result["clusters"] = self.dblabels 4182 umap_result = umap_result[umap_result["clusters"] != "-1"] 4183 tmp_metadata = self.tmp_metadata.copy() 4184 tmp_metadata["clusters"] = self.dblabels 4185 tmp_metadata = tmp_metadata[tmp_metadata["clusters"] != "-1"] 4186 tmp_data = self.tmp_data.copy() 4187 tmp_data.index = self.dblabels 4188 tmp_data = tmp_data[tmp_data.index != "-1"] 4189 4190 label_counts_dict = Counter(self.dblabels) 4191 4192 label_counts = pd.DataFrame.from_dict( 4193 label_counts_dict, orient="index", columns=["count"] 4194 ) 4195 4196 filtered_counts = label_counts[label_counts["count"] > min_entities] 4197 4198 tmp_metadata["full_id"] = list( 4199 tmp_metadata["id"].astype(str) + " # " + tmp_metadata["sets"] 4200 ) 4201 4202 tmp_data.index = tmp_metadata["full_id"] 4203 umap_result["full_id"] = list(tmp_metadata["full_id"]) 4204 4205 umap_result = umap_result[ 4206 umap_result["clusters"].isin(np.array(filtered_counts.index)) 4207 ] 4208 tmp_metadata = tmp_metadata[ 4209 tmp_metadata["clusters"].isin(np.array(filtered_counts.index)) 4210 ] 4211 4212 umap_result = umap_result.sort_values( 4213 by="clusters", key=lambda x: x.astype(int) 4214 ) 4215 4216 tmp_data = tmp_data[tmp_data.index.isin(np.array(tmp_metadata["full_id"]))] 4217 4218 static_fig = umap_static( 4219 umap_result, width=width, height=height, n_per_col=n_per_col 4220 ) 4221 4222 html_fig = umap_html(umap_result, width=width * 100, height=height * 100) 4223 4224 self.UMAP_plot["static"]["ClusterUMAP"] = static_fig 4225 self.UMAP_plot["html"]["ClusterUMAP"] = html_fig 4226 4227 tmp_metadata["full_name"] = list( 4228 tmp_metadata["clusters"] + " # " + tmp_metadata["sets"] 4229 ) 4230 4231 label_counts_dict = Counter(list(tmp_metadata["full_name"])) 4232 4233 label_counts = pd.DataFrame.from_dict( 4234 label_counts_dict, orient="index", columns=["count"] 4235 ) 4236 4237 filtered_counts = label_counts[label_counts["count"] > min_entities] 4238 4239 tmp_data.index = tmp_metadata["full_name"] 4240 umap_result["clusters"] = list(tmp_metadata["full_name"]) 4241 4242 umap_result = umap_result[ 4243 umap_result["clusters"].isin(np.array(filtered_counts.index)) 4244 ] 4245 4246 tmp_metadata = tmp_metadata[ 4247 tmp_metadata["full_name"].isin(np.array(filtered_counts.index)) 4248 ] 4249 4250 tmp_data = tmp_data[tmp_data.index.isin(np.array(filtered_counts.index))] 4251 4252 static_fig = umap_static( 4253 umap_result, width=width, height=height, n_per_col=n_per_col 4254 ) 4255 4256 html_fig = umap_html(umap_result, width=width * 100, height=height * 100) 4257 4258 self.UMAP_plot["static"]["ClusterXSetsUMAP"] = static_fig 4259 4260 self.UMAP_plot["html"]["ClusterXSetsUMAP"] = html_fig 4261 4262 self._tmp_data_old = self.tmp_data 4263 self._tmp_metadata_old = self.tmp_metadata 4264 4265 self.tmp_data = tmp_data 4266 self.tmp_metadata = tmp_metadata 4267 4268 else: 4269 print( 4270 "\nNo data for visualization. Please use the UMAP() and db_scan() methods first." 4271 )
Generate UMAP visualizations for clusters filtered by a minimum entity threshold.
This method removes clusters containing fewer than min_entities observations
and produces two UMAP visualizations:
- Cluster UMAP – points colored by cluster assignment only.
- Cluster × Set UMAP – points colored by the combination of cluster and set identifier.
Parameters
min_entities : int, optional Minimum number of entities required for a cluster to be included in the visualization. Default is 50.
width : int, optional Width of the generated matplotlib figures (in inches). Default is 8.
height : int, optional Height of the generated matplotlib figures (in inches). Default is 6.
n_per_col : int, optional Maximum number of legend entries per column. Default is 20.
Notes
This method updates the following attributes:
self.UMAP_plot['static']['ClusterUMAP']Static matplotlib figure of the filtered cluster-only UMAP.self.UMAP_plot['html']['ClusterUMAP']Interactive HTML version of the cluster-only UMAP.self.UMAP_plot['static']['ClusterXSetsUMAP']Static matplotlib figure showing clusters combined with set identifiers.self.UMAP_plot['html']['ClusterXSetsUMAP']Interactive HTML version of the cluster × set visualization.self.tmp_dataDataset filtered to include only clusters meeting themin_entitiesthreshold.self.tmp_metadataMetadata corresponding to the filtered dataset.
4274 def full_info(self): 4275 """ 4276 Merge data with metadata based on the 'full_id' column. 4277 4278 This method combines `self.tmp_data` and `self.tmp_metadata` into a single DataFrame if the metadata contains a 'full_id' column. If 'full_id' is not present, the method prints a warning to complete the preprocessing pipeline. 4279 4280 Returns 4281 ------- 4282 pd.DataFrame or None 4283 Merged DataFrame containing both data and metadata if 'full_id' exists; otherwise, None. 4284 """ 4285 4286 tmp_data = self.tmp_data.copy() 4287 tmp_metadata = self.tmp_metadata.copy() 4288 4289 if "full_id" in tmp_metadata.columns: 4290 tmp_data.index = tmp_metadata["full_id"] 4291 4292 merged_df = tmp_data.merge( 4293 tmp_metadata, left_index=True, right_on="full_id", how="left" 4294 ) 4295 4296 return merged_df 4297 4298 else: 4299 4300 print("\nMetadata is not completed!") 4301 4302 #################################################################################
Merge data with metadata based on the 'full_id' column.
This method combines self.tmp_data and self.tmp_metadata into a single DataFrame if the metadata contains a 'full_id' column. If 'full_id' is not present, the method prints a warning to complete the preprocessing pipeline.
Returns
pd.DataFrame or None Merged DataFrame containing both data and metadata if 'full_id' exists; otherwise, None.
4304 def DFA(self, meta_group_by: str = "sets", sets: dict = {}, n_proc=5): 4305 """ 4306 Perform Differential Feature Analysis (DFA) on specified data groups. 4307 4308 This method conducts DFA using a grouping factor from metadata and a dictionary of sets for comparison. It allows for the identification of significant differences across defined sets. 4309 4310 The analysis includes: 4311 - Mann–Whitney U test 4312 - Percentage of non-zero values 4313 - Means and standard deviations 4314 - Effect size metric (ESM) 4315 - Benjamini–Hochberg FDR correction 4316 - Fold-change and log2 fold-change 4317 4318 Parameters 4319 ---------- 4320 meta_group_by : str, optional 4321 Metadata column used for grouping during the analysis. 4322 Default is ``'sets'``. 4323 To view available grouping categories, use ``self.groups``. 4324 4325 sets : dict, optional 4326 Dictionary defining groups for pairwise comparison. 4327 Keys correspond to group names, and values are lists of labels 4328 belonging to each group. 4329 4330 Example 4331 ------- 4332 >>> sets = { 4333 ... 'healthy': ['21q'], 4334 ... 'disease': ['71q', '77q', '109q'] 4335 ... } 4336 In this configuration, the *healthy* group is compared against the 4337 aggregated *disease* groups. 4338 4339 n_proc : int, optional 4340 Number of CPU cores used for parallel processing. 4341 Default is ``5``. 4342 4343 Returns 4344 ------- 4345 pandas.DataFrame or None 4346 A DataFrame containing statistical results for each feature, including: 4347 4348 - ``feature`` : str 4349 - ``p_val`` : float 4350 - ``adj_pval`` : float 4351 - ``pct_valid`` : float 4352 - ``pct_ctrl`` : float 4353 - ``avg_valid`` : float 4354 - ``avg_ctrl`` : float 4355 - ``sd_valid`` : float 4356 - ``sd_ctrl`` : float 4357 - ``esm`` : float 4358 - ``FC`` : float 4359 - ``log(FC)`` : float 4360 - ``norm_diff`` : float 4361 - ``valid_group`` : str 4362 - ``-log(p_val)`` : float 4363 4364 If ``sets`` is ``None``, results for each group are concatenated. 4365 4366 Returns ``None`` in case of errors or invalid parameters. 4367 4368 Notes 4369 ----- 4370 - Columns containing only zeros are automatically removed. 4371 - p-values equal for both groups produce ``p_val = 1``. 4372 - Benjamini–Hochberg correction is applied separately within each group comparison. 4373 - Fold-change is stabilized using a small, data-derived ``low_factor``. 4374 - Uses ``Mann–Whitney U`` test with ``alternative='two-sided'``. 4375 4376 """ 4377 4378 tmp_data = self.tmp_data.copy() 4379 4380 tmp_data = tmp_data.select_dtypes(include="number") 4381 4382 tmp_metadata = self.tmp_metadata.copy() 4383 4384 if len(sets.keys()) >= 2: 4385 print("\nAnalysis strated on provided sets dictionary and meta_group_by...") 4386 tmp_data.index = list(tmp_metadata[meta_group_by]) 4387 tmp_metadata["sets"] = tmp_metadata[meta_group_by] 4388 results = statistic( 4389 tmp_data.transpose(), sets=sets, metadata=tmp_metadata, n_proc=n_proc 4390 ) 4391 4392 else: 4393 print( 4394 "\nAnalysis strated on for all groups to each other in meta_group_by..." 4395 ) 4396 tmp_data.index = list(tmp_metadata[meta_group_by]) 4397 tmp_metadata["sets"] = tmp_metadata[meta_group_by] 4398 results = statistic( 4399 tmp_data.transpose(), sets=None, metadata=tmp_metadata, n_proc=n_proc 4400 ) 4401 4402 self.DFA_results = results
Perform Differential Feature Analysis (DFA) on specified data groups.
This method conducts DFA using a grouping factor from metadata and a dictionary of sets for comparison. It allows for the identification of significant differences across defined sets.
The analysis includes:
- Mann–Whitney U test
- Percentage of non-zero values
- Means and standard deviations
- Effect size metric (ESM)
- Benjamini–Hochberg FDR correction
- Fold-change and log2 fold-change
Parameters
meta_group_by : str, optional
Metadata column used for grouping during the analysis.
Default is 'sets'.
To view available grouping categories, use self.groups.
sets : dict, optional Dictionary defining groups for pairwise comparison. Keys correspond to group names, and values are lists of labels belonging to each group.
Example
-------
>>> sets = {
... 'healthy': ['21q'],
... 'disease': ['71q', '77q', '109q']
... }
In this configuration, the *healthy* group is compared against the
aggregated *disease* groups.
n_proc : int, optional
Number of CPU cores used for parallel processing.
Default is 5.
Returns
pandas.DataFrame or None A DataFrame containing statistical results for each feature, including:
- ``feature`` : str
- ``p_val`` : float
- ``adj_pval`` : float
- ``pct_valid`` : float
- ``pct_ctrl`` : float
- ``avg_valid`` : float
- ``avg_ctrl`` : float
- ``sd_valid`` : float
- ``sd_ctrl`` : float
- ``esm`` : float
- ``FC`` : float
- ``log(FC)`` : float
- ``norm_diff`` : float
- ``valid_group`` : str
- ``-log(p_val)`` : float
If ``sets`` is ``None``, results for each group are concatenated.
Returns ``None`` in case of errors or invalid parameters.
Notes
- Columns containing only zeros are automatically removed.
- p-values equal for both groups produce
p_val = 1. - Benjamini–Hochberg correction is applied separately within each group comparison.
- Fold-change is stabilized using a small, data-derived
low_factor. - Uses
Mann–Whitney Utest withalternative='two-sided'.
4404 def heatmap_DFA( 4405 self, 4406 p_value: float | int = 0.05, 4407 top_n: int = 5, 4408 scale: bool = False, 4409 clustering: str | None = "ward", 4410 figsize=(10, 5), 4411 ): 4412 """ 4413 Generate a heatmap of the top DFA features filtered by p-value and log fold change. 4414 4415 Parameters 4416 ---------- 4417 p_value : float or int, optional 4418 Significance threshold used to filter features by their p-value. 4419 Only features with p_val < p_value are included. Default is 0.05. 4420 4421 top_n : int, optional 4422 Number of top features selected per group based on the 'esm' score. 4423 Default is 5. 4424 4425 scale : bool, optional 4426 Whether to apply Min–Max scaling to heatmap values across features. 4427 Default is False. 4428 4429 clustering : str or None, optional 4430 Hierarchical clustering method applied to rows/columns of the heatmap. 4431 If None, clustering is disabled. Default is 'ward'. 4432 4433 figsize : tuple, optional 4434 Size of the resulting matplotlib figure. Default is (10, 5). 4435 4436 Notes 4437 ----- 4438 - Only features with a positive log fold change ('log(FC)' > 0) are considered. 4439 - Heatmap values represent -log10(p_value) for visualization. 4440 - If `scale=True`, values are normalized using Min–Max scaling. 4441 - The generated figure is displayed and stored in `self.DFA_plot`. 4442 """ 4443 4444 df_reduced = self.DFA_results.copy() 4445 4446 df_reduced = df_reduced[df_reduced["log(FC)"] > 0] 4447 4448 df_reduced = df_reduced[df_reduced["p_val"] < p_value] 4449 4450 df_reduced = ( 4451 df_reduced.sort_values(["valid_group", "esm"], ascending=[True, False]) 4452 .groupby("valid_group", as_index=False) 4453 .head(top_n) 4454 ) 4455 4456 heatmap_data = df_reduced.pivot( 4457 index="feature", columns="valid_group", values="-log(p_val)" 4458 ).fillna(0) 4459 4460 label = "-log10(p_value)" 4461 4462 if scale: 4463 label = f"scaled({label})" 4464 scaler = MinMaxScaler() 4465 heatmap_data = pd.DataFrame( 4466 scaler.fit_transform(heatmap_data), 4467 index=heatmap_data.index, 4468 columns=heatmap_data.columns, 4469 ) 4470 4471 if clustering is not None: 4472 Z_rows = linkage(heatmap_data.values, method=clustering) 4473 row_order = leaves_list(Z_rows) 4474 4475 Z_cols = linkage(heatmap_data.values.T, method=clustering) 4476 col_order = leaves_list(Z_cols) 4477 4478 heatmap_data = heatmap_data.iloc[row_order, col_order] 4479 4480 figure = plt.figure(figsize=figsize) 4481 sns.heatmap( 4482 heatmap_data, 4483 cmap="viridis", 4484 linewidths=0.5, 4485 linecolor="gray", 4486 cbar_kws={"label": label}, 4487 fmt=".2f", 4488 ) 4489 plt.ylabel("Feature") 4490 plt.xlabel("Cluster") 4491 plt.xticks(rotation=30, ha="right") 4492 4493 plt.tight_layout() 4494 4495 if cfg._DISPLAY_MODE: 4496 plt.show() 4497 4498 self.DFA_plot = figure
Generate a heatmap of the top DFA features filtered by p-value and log fold change.
Parameters
p_value : float or int, optional Significance threshold used to filter features by their p-value. Only features with p_val < p_value are included. Default is 0.05.
top_n : int, optional Number of top features selected per group based on the 'esm' score. Default is 5.
scale : bool, optional Whether to apply Min–Max scaling to heatmap values across features. Default is False.
clustering : str or None, optional Hierarchical clustering method applied to rows/columns of the heatmap. If None, clustering is disabled. Default is 'ward'.
figsize : tuple, optional Size of the resulting matplotlib figure. Default is (10, 5).
Notes
- Only features with a positive log fold change ('log(FC)' > 0) are considered.
- Heatmap values represent -log10(p_value) for visualization.
- If
scale=True, values are normalized using Min–Max scaling. - The generated figure is displayed and stored in
self.DFA_plot.
4500 def get_DFA_plot(self, show: bool = True): 4501 """ 4502 Retrieve the heatmap figure generated by `heatmap_DFA()`. 4503 4504 Parameters 4505 ---------- 4506 show : bool, optional 4507 Whether to display the stored heatmap figure. Default is True. 4508 4509 Returns 4510 ------- 4511 matplotlib.figure.Figure 4512 The figure object containing the DFA heatmap. 4513 """ 4514 4515 if self.DFA_plot is None: 4516 print("\nNo results to return! Please run the heatmap_DFA() method first.") 4517 else: 4518 if cfg._DISPLAY_MODE: 4519 if show is True: 4520 self.DFA_plot 4521 try: 4522 display(self.DFA_plot) 4523 except: 4524 None 4525 4526 return self.DFA_plot
Retrieve the heatmap figure generated by heatmap_DFA().
Parameters
show : bool, optional Whether to display the stored heatmap figure. Default is True.
Returns
matplotlib.figure.Figure The figure object containing the DFA heatmap.
4528 def print_avaiable_features(self): 4529 """ 4530 Print the available features (columns) in the current dataset. 4531 4532 This method lists all column names in `self.tmp_data` to help identify which features are available for analysis. 4533 4534 Example 4535 ------- 4536 >>> group_analysis.print_avaiable_features() 4537 """ 4538 4539 print("Avaiable features:") 4540 for cl in self.tmp_data.columns: 4541 print(cl)
Print the available features (columns) in the current dataset.
This method lists all column names in self.tmp_data to help identify which features are available for analysis.
Example
>>> group_analysis.print_avaiable_features()
4543 def proportion_analysis( 4544 self, 4545 grouping_col: str = "sets", 4546 val_col: str = "nuclei_per_img", 4547 grouping_dict=None, 4548 omit=None, 4549 ): 4550 """ 4551 Perform proportion analysis by comparing the distribution of values across groups. 4552 4553 This method analyzes the distribution of values (e.g., nuclei counts) across different groups defined in the dataset. It can optionally group categories, omit specific values, and produces both statistical results and a visualization. 4554 4555 Parameters 4556 ---------- 4557 grouping_col : str, optional 4558 Column to group by. Default is 'sets'. 4559 4560 val_col : str, optional 4561 Column containing the values to analyze. Default is 'nuclei_per_img'. 4562 4563 grouping_dict : dict or None, optional 4564 Dictionary mapping new group names to categories in `grouping_col`. If None, analysis is based on the original groups. 4565 4566 omit : str, list, or None, optional 4567 Values to exclude from the analysis. Default is None. 4568 4569 Attributes 4570 ---------- 4571 proportion_stats : pd.DataFrame 4572 DataFrame containing chi-square test results for pairwise group comparisons. 4573 4574 proportion_plot : matplotlib.figure.Figure 4575 Plot visualizing the proportions across groups. 4576 4577 Example 4578 ------- 4579 >>> group_analysis.proportion_analysis( 4580 ... grouping_col='sets', 4581 ... val_col='nuclei_per_img', 4582 ... grouping_dict={'Group A': [1, 2], 'Group B': [3, 4]}, 4583 ... omit=5 4584 ... ) 4585 """ 4586 4587 andata = self.tmp_data.copy() 4588 4589 andata[grouping_col] = list(self.tmp_metadata[grouping_col]) 4590 4591 andata = andata[[grouping_col, val_col]] 4592 4593 if omit is not None: 4594 if isinstance(omit, list): 4595 andata = andata[~andata[val_col].isin(omit)] 4596 else: 4597 andata = andata[andata[val_col] != omit] 4598 4599 andata = andata.reset_index(drop=True) 4600 andata["index_col"] = andata.index 4601 4602 if isinstance(grouping_dict, dict): 4603 for k in grouping_dict.keys(): 4604 andata.loc[ 4605 andata[grouping_col].isin(grouping_dict[k]), grouping_col 4606 ] = k 4607 4608 df_pivot = andata.pivot_table( 4609 index=val_col, 4610 columns=grouping_col, 4611 values="index_col", 4612 aggfunc="count", 4613 fill_value=0, 4614 ) 4615 4616 Z_rows = linkage(df_pivot.values, method="ward") 4617 row_order = leaves_list(Z_rows) 4618 4619 Z_cols = linkage(df_pivot.values.T, method="ward") 4620 col_order = leaves_list(Z_cols) 4621 4622 df_pivot = df_pivot.iloc[row_order, col_order] 4623 4624 chi_df = chi_pairs(df_pivot) 4625 4626 self.proportion_stats = chi_pairs(df_pivot) 4627 4628 chi_df["Significance_Label"] = chi_df["p-value"].apply(get_significance_label) 4629 4630 self.proportion_plot = prop_plot(df_pivot, chi_df)
Perform proportion analysis by comparing the distribution of values across groups.
This method analyzes the distribution of values (e.g., nuclei counts) across different groups defined in the dataset. It can optionally group categories, omit specific values, and produces both statistical results and a visualization.
Parameters
grouping_col : str, optional Column to group by. Default is 'sets'.
val_col : str, optional Column containing the values to analyze. Default is 'nuclei_per_img'.
grouping_dict : dict or None, optional
Dictionary mapping new group names to categories in grouping_col. If None, analysis is based on the original groups.
omit : str, list, or None, optional Values to exclude from the analysis. Default is None.
Attributes
proportion_stats : pd.DataFrame DataFrame containing chi-square test results for pairwise group comparisons.
proportion_plot : matplotlib.figure.Figure Plot visualizing the proportions across groups.
Example
>>> group_analysis.proportion_analysis(
... grouping_col='sets',
... val_col='nuclei_per_img',
... grouping_dict={'Group A': [1, 2], 'Group B': [3, 4]},
... omit=5
... )
4632 def get_proportion_plot(self, show: bool = True): 4633 """ 4634 Retrieve the proportion bar plot generated by the `proportion_analysis()` method. 4635 4636 Parameters 4637 ---------- 4638 show : bool, optional 4639 Whether to display the proportion bar plot. Default is True. 4640 4641 Returns 4642 ------- 4643 matplotlib.figure.Figure 4644 The figure object containing the proportion bar plot. 4645 """ 4646 4647 if self.proportion_plot is None: 4648 print( 4649 "\nNo results to return! Please run the proportion_analysis() method first." 4650 ) 4651 else: 4652 if cfg._DISPLAY_MODE: 4653 if show: 4654 self.proportion_plot 4655 try: 4656 display(self.proportion_plot) 4657 except: 4658 None 4659 4660 return self.proportion_plot
Retrieve the proportion bar plot generated by the proportion_analysis() method.
Parameters
show : bool, optional Whether to display the proportion bar plot. Default is True.
Returns
matplotlib.figure.Figure The figure object containing the proportion bar plot.
4662 def get_proportion_stats(self): 4663 """ 4664 Retrieve the proportion statistics computed by the `proportion_analysis()` method. 4665 4666 Returns 4667 ------- 4668 pd.DataFrame 4669 The proportion statistics stored in `self.proportion_stats`. 4670 """ 4671 4672 if None in self.proportion_stats: 4673 print( 4674 "\nNo results to return! Please run the proportion_analysis() method first." 4675 ) 4676 else: 4677 return self.proportion_stats
Retrieve the proportion statistics computed by the proportion_analysis() method.
Returns
pd.DataFrame
The proportion statistics stored in self.proportion_stats.