Source code for hsimars.hsi

"""
Open and manipulate hyperspectral images (HSI) from Mars Reconnaissance Orbiter.

This module provides the `HSIMars` class for working with hyperspectral imaging data from the CRISM instrument aboard the Mars Reconnaissance Orbiter. It supports loading, processing, and visualising HSI data and their annotations.

The module handles:
- Loading ENVI format hyperspectral images (.hdr and .img files)
- Processing HSI data (removing bad bands, cropping, normalisation)
- Loading and aligning ground truth annotations (.mat files)
- Visualising spectral data, false-colour images, and annotations
- Plotting spectra with optional convex hull removal
- Generating histograms for spectral bands

**Author**: Riccardo Finotello <riccardo.finotello@cea.fr>

**Maintainer**: Riccardo Finotello <riccardo.finotello@cea.fr>

**Contributors**:

    - Riccardo Finotello

Examples
--------
>>> from hsimars import HSIMars
>>> hsi = HSIMars(
...     hdr_path="path/to/image.hdr", annotations="path/to/annotations.mat"
... )
>>> img_data = hsi.get_img()
>>> print(f"Image shape: {img_data.shape}")
>>> hsi.plot_spectra(px=[100, 200], convex_hull=True, bands=True)
"""

from __future__ import annotations

from collections import namedtuple
from pathlib import Path
from typing import NamedTuple

import cv2
import matplotlib as mpl
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from numpy.typing import NDArray
from pysptools.spectro import convex_hull_removal
from scipy.interpolate import splev, splrep
from scipy.io import loadmat
from spectral.io import envi

# Configure matplotlib for consistent styling
plt.style.use("grayscale")
mpl.rc("font", size=16)


[docs] class HSIMars: """ Load and manipulate hyperspectral images from Mars Reconnaissance Orbiter. This class provides functionality for working with CRISM hyperspectral data: loading ENVI format images, processing spectral data, handling ground truth annotations, and creating visualisations. The class uses lazy loading for memory efficiency - data loads from disk only when first accessed, then caches for later use. Attributes ---------- hdr_path : Path Path to the ENVI header file (.hdr extension). annotations : Path or None Path to the ground truth annotations file (.mat format), if provided. Examples -------- >>> # Load HSI data without annotations >>> hsi = HSIMars(hdr_path="data/sample.hdr") >>> img_data = hsi.get_img() >>> print( ... f"Image dimensions: {img_data.height}x{img_data.width}, {img_data.channels} channels" ... ) >>> # Load HSI data with annotations >>> hsi = HSIMars( ... hdr_path="data/sample.hdr", annotations="data/labels.mat" ... ) >>> img_data, ann_data = hsi.data() >>> hsi.display() # Show image with overlaid annotations >>> # Plot spectrum for a specific pixel >>> hsi.plot_spectra(px=[100, 200], convex_hull=True, bands=True) Notes ----- The ENVI .img file containing the hyperspectral data must be in the same directory as the .hdr header file. """ def __init__( self, hdr_path: str | Path, annotations: str | Path | None = None, label_names_path: str | Path | None = None, ): """ Initialise the HSIMars object with paths to data files. Parameters ---------- hdr_path : str | Path Path to the ENVI header file (.hdr extension) with metadata about the hyperspectral image. The corresponding .img file with spectral data must be in the same directory. annotations : str | Path, optional Path to the ground truth annotations file (.mat format). If None, annotation methods return None. Default is None. label_names_path : str | Path, optional Path to the Excel file with label name mappings. If None, looks for ``data_description.xlsx`` in the same directory as the HDR file. Default is None. Raises ------ FileNotFoundError If the header file does not exist. FileNotFoundError If annotations path is provided but the file does not exist. FileNotFoundError If label_names_path is provided but the file does not exist. Examples -------- >>> hsi = HSIMars(hdr_path="path/to/image.hdr") >>> hsi_with_labels = HSIMars( ... hdr_path="path/to/image.hdr", annotations="path/to/labels.mat" ... ) >>> hsi_custom_labels = HSIMars( ... hdr_path="path/to/image.hdr", ... annotations="path/to/labels.mat", ... label_names_path="path/to/custom_labels.xlsx", ... ) Notes ----- The constructor validates file paths only - no data loads until a get_* method is called. This lazy loading reduces memory usage with large datasets. """ self.hdr_path: Path = Path(hdr_path) if not self.hdr_path.exists(): raise FileNotFoundError( f"The header file {self.hdr_path} could not be found in the filesystem." ) self.annotations: Path | None = None if annotations is not None: self.annotations = Path(annotations) if not self.annotations.exists(): raise FileNotFoundError( f"The annotation file {self.annotations} could not be found in the filesystem." ) self.label_names_path: Path | None = None if label_names_path is not None: self.label_names_path = Path(label_names_path) if not self.label_names_path.exists(): raise FileNotFoundError( f"The label names file {self.label_names_path} could not be found in the filesystem." ) # Cache large objects to avoid repeated disk I/O and processing # These are populated on first access via get_* methods self._raw: envi.BsqFile | None = None self._img: NamedTuple | None = None self._ann: NamedTuple | None = None self._false_colour_bands: NDArray | None = None
[docs] def get_raw(self) -> envi.BsqFile: """ Load the raw ENVI hyperspectral data file. This method opens the ENVI format file and returns a file object for accessing the raw spectral data. The data caches after the first call to avoid redundant disk I/O. Returns ------- envi.BsqFile ENVI file object for accessing the hyperspectral data. This object supports memory-mapped access for large files. Examples -------- >>> hsi = HSIMars(hdr_path="data/sample.hdr") >>> raw = hsi.get_raw() >>> print(raw.metadata["wavelength"]) # Access wavelength information Notes ----- The spectral data is in a .img file that must be in the same directory as the .hdr file. The ENVI library automatically finds and opens the .img file. """ if self._raw is None: self._raw = envi.open(self.hdr_path) return self._raw
[docs] def get_img(self) -> NamedTuple: """ Load and process the hyperspectral image data. This method loads the raw HSI data, performs preprocessing steps including bad band removal, cropping of invalid pixels, and normalization. The result is cached for efficient subsequent access. The preprocessing pipeline includes: 1. Loading wavelength information and identifying bad bands 2. Removing pixels with the ignore value (65535) 3. Cropping the image to remove rows/columns with no valid data 4. Removing any remaining bad channels 5. Converting to float32 format for numerical processing Returns ------- NamedTuple A named tuple (HSIMarsImageData) containing the following attributes: - **hsi** : ndarray of shape (height, width, channels) The processed hyperspectral image data as a 3D numpy array. Data type is float32. - **wavelength** : ndarray of shape (channels,) Array of wavelength values in nanometers corresponding to each spectral channel. - **shape** : tuple of (height, width, channels) Dimensions of the hyperspectral image. - **height** : int Number of pixel rows in the image. - **width** : int Number of pixel columns in the image. - **channels** : int Number of spectral bands/channels in the image. - **dtype** : str Data type of the HSI array ('float32'). Examples -------- >>> hsi = HSIMars(hdr_path="data/sample.hdr") >>> img_data = hsi.get_img() >>> print(f"Image shape: {img_data.shape}") >>> print( ... f"Wavelength range: {img_data.wavelength.min():.1f} - {img_data.wavelength.max():.1f} nm" ... ) >>> print(f"Data type: {img_data.dtype}") >>> # Access specific pixel spectrum >>> spectrum = img_data.hsi[100, 200, :] >>> print(f"Spectrum at (100, 200): {spectrum.shape[0]} bands") Notes ----- The method implements lazy evaluation - the image is only loaded and processed on the first call. Subsequent calls return the cached result. The bad band list (bbl) from the ENVI metadata is used to filter out unreliable spectral channels before further processing. """ if self._img is not None: return self._img # Get the raw data img = self.get_raw() # Recover the image and the metadata img_memmap = img.open_memmap() img_metadata = img.metadata # Extract wavelength information and ignore value for invalid pixels wl = np.array(img_metadata["wavelength"]).astype("float32") ignore_value = float(img_metadata["data ignore value"]) # Select the default bands for false-color visualization bands = np.array(img_metadata["default bands"]).astype(int) bands = wl[bands] # Filter out bad bands based on metadata bad band list (bbl) bbl = np.array(img_metadata["bbl"]).astype("bool") img_memmap = img_memmap[..., bbl] wl = wl[bbl] # Crop the image by removing rows/columns containing only ignore values # This reduces memory usage and eliminates invalid border regions mask = img_memmap != ignore_value mask_channels = mask.sum(axis=2) good_rows = mask_channels.sum(axis=1) > 0 good_cols = mask_channels.sum(axis=0) > 0 img_memmap = img_memmap[good_rows, :][:, good_cols] # Find and remove any remaining bad channels that still contain # ignore values mask = img_memmap == ignore_value idx = np.unique(np.argwhere(mask)[..., 2]) ch = np.ones((img_memmap.shape[2],), dtype="bool") ch[idx] = False img_memmap = img_memmap[..., ch] wl = wl[ch] # Update the default bands indices for false-color visualization self._false_colour_bands = np.searchsorted(wl, bands).astype(int) output = namedtuple( "HSIMarsImageData", [ "hsi", "wavelength", "shape", "height", "width", "channels", "dtype", ], ) self._img = output( hsi=img_memmap.astype("float32"), wavelength=wl, shape=img_memmap.shape, height=img_memmap.shape[0], width=img_memmap.shape[1], channels=img_memmap.shape[2], dtype="float32", ) return self._img
def _pad_annotations(self, mat: NDArray) -> NDArray: """ Pad annotation matrix to match the processed HSI dimensions. The annotation matrix may have different dimensions than the processed HSI data after cropping. This method centers the annotations within the target dimensions by adding symmetric padding. Parameters ---------- mat : NDArray The raw annotation matrix to be padded. Returns ------- NDArray The padded annotation matrix matching HSI dimensions. Notes ----- Padding is distributed symmetrically around the annotation data, with any odd remainder added to the bottom/right edges. """ # Get target shape from processed image img = self.get_img() H, W = img.height, img.width h, w = mat.shape # Calculate symmetric padding for height and width pad_top = (H - h) // 2 pad_bottom = H - h - pad_top pad_left = (W - w) // 2 pad_right = W - w - pad_left return np.pad(mat, ((pad_top, pad_bottom), (pad_left, pad_right))) def _load_label_names(self) -> dict[int, str]: """ Load label names from the data description Excel file. This method reads the data description Excel file and extracts the mapping between numerical class labels and their human-interpretable names for the current dataset. The method automatically identifies the dataset by extracting a two-letter prefix from the HDR filename (e.g., "HC", "NF", or "UP") and locates the corresponding section in the Excel file. It then parses the class ID and class name columns to build the label mapping dictionary. Returns ------- dict[int, str] A dictionary mapping numerical class labels (integers) to their corresponding human-readable class names (strings). For example: {1: 'Analcime', 2: 'Plagioclase', 3: 'Prehnite', ...} Returns an empty dictionary if: - The Excel file is not found - The dataset prefix is not found in the Excel file - The Excel file structure is invalid or cannot be parsed Notes ----- If ``label_names_path`` was provided during initialization, that file will be used. Otherwise, the method looks for a file named ``data_description.xlsx`` in the same directory as the HDR file. The file should contain sections for each dataset, where each section starts with a row containing the dataset name (e.g., "HC", "NF", "UP"), followed by a header row with columns "Class ID", "Class Name", and "Total", and then data rows containing the class ID and name pairs. This method is called internally by :meth:`get_annotations` to populate the ``label_names`` field of the ``HSIMarsAnnotationData`` named tuple. Examples -------- The expected Excel file structure for a dataset section: .. code-block:: text HC Class ID | Class Name | Total 1 | Analcime | 940 2 | Plagioclase | 1472 3 | Prehnite | 1560 ... Warnings -------- If the Excel file or dataset section cannot be found, this method returns an empty dictionary rather than raising an exception. This allows the annotation loading process to continue even when label names are unavailable, though the ``label_names`` field will be empty. """ # Determine which Excel file to use if self.label_names_path is not None: excel_path = self.label_names_path else: # Look for data_description.xlsx in same directory as HDR file excel_path = self.hdr_path.parent / "data_description.xlsx" # Return empty dict if Excel file doesn't exist if not excel_path.exists(): return {} # Extract dataset prefix from HDR filename (first 2 characters) dataset_name = self.hdr_path.stem[:2].upper() try: # Read Excel file without header to access raw cell data df = pd.read_excel(excel_path, header=None) # Find the row containing the dataset name identifier dataset_row_idx = None for idx, row in df.iterrows(): if row[0] == dataset_name: dataset_row_idx = idx break # Return empty dict if dataset not found in Excel if dataset_row_idx is None: return {} # Skip the header row (next row after dataset name) # and start reading class labels from the following row data_start_row = dataset_row_idx + 2 # Parse class ID and name pairs until hitting end marker label_map = {} for row_idx in range(data_start_row, len(df)): row = df.iloc[row_idx] # Stop at empty row or "Total" row (end of section) if pd.isna(row[0]): break # Parse and store class ID to name mapping try: class_id = int(row[0]) class_name = str(row[1]).strip() label_map[class_id] = class_name except (ValueError, TypeError): # Skip rows that don't contain valid ID/name pairs continue return label_map except Exception: # Return empty dict if any error occurs during parsing return {}
[docs] def get_annotations(self) -> NamedTuple | None: """ Load and process ground truth annotation data. Loads annotation labels from a MATLAB .mat file and aligns them with the processed HSI data dimensions. The result is cached for efficient subsequent access. Returns ------- NamedTuple or None If annotations were provided during initialization, returns a named tuple (HSIMarsAnnotationData) with the following attributes: - **labels** : ndarray of shape (height, width) 2D array containing class labels for each pixel. Values are unsigned integers representing different material classes. A value of 0 typically indicates unlabeled/background pixels. - **shape** : tuple of (height, width) Dimensions of the annotation matrix. - **height** : int Number of pixel rows (matches HSI height). - **width** : int Number of pixel columns (matches HSI width). - **dtype** : str Data type of the labels array ('uint8'). - **label_names** : dict[int, str] Dictionary mapping numerical class labels to human-readable class names. For example: {1: 'Analcime', 2: 'Plagioclase'}. Will be an empty dictionary if the label mapping file is not found or cannot be parsed. Returns None if no annotation file was provided during initialization. Examples -------- >>> hsi = HSIMars( ... hdr_path="data/sample.hdr", annotations="data/labels.mat" ... ) >>> ann_data = hsi.get_annotations() >>> if ann_data is not None: ... print(f"Annotation shape: {ann_data.shape}") ... unique_labels = np.unique(ann_data.labels) ... print(f"Number of classes: {len(unique_labels)}") Notes ----- The annotation matrix is automatically padded to match the dimensions of the processed HSI data. This ensures pixel-level alignment between spectral data and labels. The method implements lazy evaluation - annotations are only loaded on the first call. Subsequent calls return the cached result. """ if self._ann is not None: return self._ann if self.annotations is None: return None # Load the annotation matrix from MATLAB file ann = loadmat(self.annotations) # Extract the first numpy array found in the .mat file mat = None for v in ann.values(): if isinstance(v, np.ndarray): mat = v.astype("uint8") break if mat is None: raise ValueError( f"No valid annotation matrix found in {self.annotations}" ) # Pad the annotation matrix to match the processed image dimensions mat = self._pad_annotations(mat) # Load label names from the data description Excel file label_names = self._load_label_names() output = namedtuple( "HSIMarsAnnotationData", ["labels", "shape", "height", "width", "dtype", "label_names"], ) self._ann = output( labels=mat, shape=mat.shape, height=mat.shape[0], width=mat.shape[1], dtype="uint8", label_names=label_names, ) return self._ann
[docs] def data(self) -> tuple[NamedTuple, NamedTuple | None]: """ Load both hyperspectral image and annotation data. This convenience method loads both the HSI data and annotations (if available) in a single call, ensuring both are cached for subsequent operations. Returns ------- tuple[NamedTuple, NamedTuple or None] A tuple containing two elements: 1. **HSIMarsImageData** (NamedTuple): - **hsi** : ndarray The HSI data array of shape (height, width, channels). - **wavelength** : ndarray Array of wavelength values in nm. - **shape** : tuple Dimensions (height, width, channels). - **height** : int Number of pixel rows. - **width** : int Number of pixel columns. - **channels** : int Number of spectral bands. - **dtype** : str Data type ('float32'). 2. **HSIMarsAnnotationData** (NamedTuple or None): If annotations are available: - **labels** : ndarray Label array of shape (height, width). - **shape** : tuple Dimensions (height, width). - **height** : int Number of pixel rows. - **width** : int Number of pixel columns. - **dtype** : str Data type ('uint8'). - **label_names** : dict[int, str] Dictionary mapping numerical class labels to human-readable class names. Returns None if no annotations were provided. Examples -------- >>> hsi = HSIMars( ... hdr_path="data/sample.hdr", annotations="data/labels.mat" ... ) >>> img_data, ann_data = hsi.data() >>> print( ... f"Image: {img_data.shape}, Annotations: {ann_data.shape if ann_data else 'None'}" ... ) Notes ----- This method is equivalent to calling `get_img()` and `get_annotations()` separately, but provides a more convenient interface when both datasets are needed. """ return self.get_img(), self.get_annotations()
def _prepare_img(self, img: NDArray) -> NDArray: """ Prepare HSI data for visualization by creating a false-color RGB image. Selects three spectral bands from the full hyperspectral cube and normalizes them to 8-bit RGB format suitable for display. Parameters ---------- img : NDArray The full hyperspectral image array. Returns ------- NDArray 8-bit RGB image suitable for display with OpenCV. Notes ----- Uses the default bands specified in the ENVI header for false-color visualization. These typically correspond to visible and near-infrared wavelengths that provide good visual contrast. """ # Select the false-color bands for RGB visualization img = img[..., self._false_colour_bands] # Normalize to 8-bit range [0, 255] for display img = cv2.normalize( img, None, 0, 255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8U ) return img def _prepare_ann(self, img: NDArray) -> NDArray: """ Prepare annotation data for visualization with color mapping. Converts label values to an 8-bit format, applies a colormap for visual distinction between classes, and preserves background pixels as black. Parameters ---------- img : NDArray The annotation label array. Returns ------- NDArray 8-bit RGB image with colormap applied, suitable for display. Notes ----- Pixels with label value 0 (background/unlabeled) are displayed as black (0, 0, 0). Other labels are mapped to colors using the TURBO colormap for maximum visual distinction. """ # Identify background pixels (label == 0) mask = img == 0 # Normalize labels to 8-bit range img = cv2.normalize( img, None, 0, 255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8U ) # Apply TURBO colormap for visual distinction between classes img = cv2.applyColorMap(img, cv2.COLORMAP_TURBO) # Preserve background as black img[mask] = (0, 0, 0) return img def _cv2_imshow(self, title: str, mat: NDArray) -> None: """ Display an image in an OpenCV window with keyboard interaction. Creates a resizable window, displays the image, and waits for a keypress before closing the window. Parameters ---------- title : str Window title. mat : NDArray Image array to display. Notes ----- The window is created with WINDOW_NORMAL flag, making it resizable. Press any key to close the window. """ # Create a resizable named window cv2.namedWindow(title, flags=cv2.WINDOW_NORMAL) cv2.imshow(title, mat) cv2.waitKey(0) # Wait for any keypress cv2.destroyWindow(title)
[docs] def display_hsi(self) -> None: """ Display the hyperspectral image as a false-color RGB visualization. Opens an interactive OpenCV window showing the HSI data rendered using three representative spectral bands. The window can be resized and closed by pressing any key. Examples -------- >>> hsi = HSIMars(hdr_path="data/sample.hdr") >>> hsi.display_hsi() # Opens window, press any key to close Notes ----- The false-color bands are automatically selected from the ENVI header metadata, typically representing visible and near-infrared wavelengths for optimal visual interpretation. """ # Load and prepare the HSI for visualization img = self.get_img().hsi img = self._prepare_img(img) self._cv2_imshow(self.hdr_path.stem, img)
[docs] def display_annotations(self) -> None: """ Display the ground truth annotations with color-coded labels. Opens an interactive OpenCV window showing the annotation labels with a colormap applied for visual distinction between classes. Background pixels (label 0) are displayed in black. Raises ------ AttributeError If no annotations were provided during initialization. Examples -------- >>> hsi = HSIMars( ... hdr_path="data/sample.hdr", annotations="data/labels.mat" ... ) >>> hsi.display_annotations() # Opens window, press any key to close Notes ----- The TURBO colormap is applied to provide maximum visual distinction between different material classes in the annotations. """ # Load and prepare annotations for visualization ann_data = self.get_annotations() if ann_data is None: raise ValueError( "No annotations available. Provide annotations path during initialization." ) ann = ann_data.labels.astype("float32") ann = self._prepare_ann(ann) self._cv2_imshow(self.hdr_path.stem + " - annotations", ann)
[docs] def display(self) -> None: """ Display comprehensive visualization with HSI, annotations, and overlay. Opens an interactive OpenCV window showing: - Left panel: False-color HSI visualization - Middle panel: Color-coded annotations (if available) - Right panel: Semi-transparent overlay of annotations on HSI (if available) If no annotations are provided, displays only the HSI. Examples -------- >>> # With annotations >>> hsi = HSIMars( ... hdr_path="data/sample.hdr", annotations="data/labels.mat" ... ) >>> hsi.display() # Shows three-panel view >>> # Without annotations >>> hsi = HSIMars(hdr_path="data/sample.hdr") >>> hsi.display() # Shows only HSI Notes ----- The overlay uses 75% weight for the HSI and 25% weight for the annotations, providing a good balance between seeing spectral features and class boundaries. """ # Load and prepare the HSI img = self.get_img().hsi img = self._prepare_img(img) # If annotations are available, create a three-panel display if self.annotations is not None: ann_data = self.get_annotations() if ann_data is not None: ann = ann_data.labels.astype("float32") ann = self._prepare_ann(ann) # Create semi-transparent overlay (75% HSI, 25% annotations) sup = cv2.addWeighted(img, 0.75, ann, 0.25, 0.0) # Concatenate horizontally: HSI | Annotations | Overlay img = cv2.hconcat([img, ann, sup]) self._cv2_imshow(self.hdr_path.stem, img)
[docs] def plot_spectra( self, px: list[int, int] | list[list[int, int]] | NDArray, convex_hull: bool = False, bands: bool = False, output: str | Path | None = None, ) -> None: """ Plot spectral signature(s) for specified pixel location(s). Generates a line plot showing reflectance/intensity as a function of wavelength. For multiple pixels, plots the mean spectrum with standard deviation shading. Optionally applies convex hull removal to normalize continuum and highlights spectral band regions. Parameters ---------- px : list[int, int] | list[list[int, int]] | NDArray Pixel coordinates to extract spectra from. Can be: - Single pixel: [row, col] or [[row, col]] - Multiple pixels: [[row1, col1], [row2, col2], ...] or 2D array Coordinates are in (row, column) format, 0-indexed. convex_hull : bool, optional If True, applies convex hull removal to normalize the spectrum continuum. This technique is useful for analyzing absorption features by removing the overall spectral shape. Default is False. bands : bool, optional If True, overlays colored regions indicating spectral band types: - VIS (Visible): < 750 nm (green) - NIR (Near-Infrared): 750-1400 nm (red) - SWIR (Short-Wave Infrared): 1400-3000 nm (blue) - MWIR (Mid-Wave Infrared): > 3000 nm (magenta) Default is False. output : str | Path, optional Path to save the plot as an image file. If None (default), displays the plot interactively using matplotlib's show(). The directory will be created if it doesn't exist. Examples -------- >>> hsi = HSIMars(hdr_path="data/sample.hdr") >>> # Plot single pixel spectrum >>> hsi.plot_spectra(px=[100, 200]) >>> # Plot average spectrum of multiple pixels with standard deviation >>> pixels = [[100, 200], [101, 200], [100, 201], [101, 201]] >>> hsi.plot_spectra(px=pixels, convex_hull=True, bands=True) >>> # Save plot to file >>> hsi.plot_spectra(px=[100, 200], output="plots/spectrum.png") Notes ----- Convex hull removal is performed using the pysptools library. This technique divides the spectrum by its convex hull envelope, effectively normalizing the continuum and emphasizing absorption features. For multiple pixels, the standard deviation is shown as a shaded region around the mean spectrum, providing visual indication of spectral variability within the selected region. """ # Load the hyperspectral image data img = self.get_img() hsi = img.hsi wl = img.wavelength # Convert pixel coordinates to 2D array format for consistent handling px = np.atleast_2d(np.array(px)) # Extract spectra for the specified pixels spec = hsi[px[:, 0], px[:, 1]] # Calculate mean and standard deviation if spec.shape[0] == 1: # Single pixel: no standard deviation spec_mean = spec[0] spec_std = None else: # Multiple pixels: compute statistics spec_mean = spec.mean(axis=0) spec_std = spec.std(axis=0) # Apply convex hull removal for continuum normalization if requested if convex_hull: spec_mean, x_hull, y_hull = convex_hull_removal(spec_mean, wl) spec_mean = 1.0 - np.array(spec_mean) # Also normalize the standard deviation using the convex hull if spec_std is not None: # Interpolate hull values at all wavelengths interp = splrep(x_hull, y_hull, k=1) y_hull = splev(wl, interp, der=0) spec_std /= y_hull # Create the plot with appropriate size and layout _, ax = plt.subplots(figsize=(7, 5), layout="constrained") ax.grid(True, alpha=0.15, ls="dashed") # Overlay spectral band regions if requested if bands: # Position text labels at 75% of the y-axis range y_pos = ( 0.75 * (spec_mean.max() - spec_mean.min()) + spec_mean.min() ) # Visible band (VIS): < 750 nm ax.axvspan(wl.min(), 750, color="g", alpha=0.15) ax.text( wl.min() + (750 - wl.min()) / 2.0, y_pos, "VIS", color="g", rotation=90, va="bottom", ha="center", ) # Near-Infrared band (NIR): 750-1400 nm ax.axvspan(750, 1400, color="r", alpha=0.15) ax.text( 750 + (1400 - 750) / 2.0, y_pos, "NIR", color="r", rotation=90, va="bottom", ha="center", ) # Short-Wave Infrared band (SWIR): 1400-3000 nm ax.axvspan(1400, 3000, color="b", alpha=0.15) ax.text( 1400 + (3000 - 1400) / 2.0, y_pos, "SWIR", color="b", rotation=90, va="bottom", ha="center", ) # Mid-Wave Infrared band (MWIR): > 3000 nm ax.axvspan(3000, wl.max(), color="m", alpha=0.15) ax.text( 3000 + (wl.max() - 3000) / 2.0, y_pos, "MWIR", color="m", rotation=90, va="bottom", ha="center", ) # Plot the mean spectrum ax.plot(wl, spec_mean, "k-", label="spectrum") # Add standard deviation shading for multiple pixels if spec_std is not None: ax.fill_between( wl, (spec_mean - spec_std).clip(0, None), # Clip to non-negative spec_mean + spec_std, label="$\\pm \\sigma$", color="k", alpha=0.15, ) # Add legend when showing standard deviation ax.legend( loc="upper center", bbox_to_anchor=(0.5, -0.15), ncol=2, frameon=False, ) # Set axis labels ax.set_xlabel("wavelength (nm)") ax.set_ylabel("intensity (a.u.)") # Display or save the plot if output is None: plt.show() else: output = Path(output) output.parent.mkdir(parents=True, exist_ok=True) plt.savefig(str(output)) plt.close() # Close the figure to free memory
[docs] def plot_histogram( self, band: int | float, output: str | Path | None = None ) -> None: """ Plot the intensity distribution histogram for a specific spectral band. Generates a probability density histogram showing the distribution of pixel intensity values across the image for the selected wavelength band. Useful for analyzing the statistical properties of spectral features. Parameters ---------- band : int | float Spectral band selector. Can be: - **int**: Direct band index (0-based) in the spectral dimension. - **float**: Wavelength in nanometers. The closest available wavelength will be automatically selected. output : str | Path, optional Path to save the histogram plot as an image file. If None (default), displays the plot interactively using matplotlib's show(). The directory will be created if it doesn't exist. Examples -------- >>> hsi = HSIMars(hdr_path="data/sample.hdr") >>> # Plot histogram for band at index 100 >>> hsi.plot_histogram(band=100) >>> # Plot histogram for band nearest to 1500 nm wavelength >>> hsi.plot_histogram(band=1500.0) >>> # Save histogram to file >>> hsi.plot_histogram( ... band=1500.0, output="plots/histogram_1500nm.png" ... ) Notes ----- The histogram uses 100 bins and is normalized to show probability density rather than raw counts. This normalization facilitates comparison between different bands or images. The histogram includes all valid pixels in the image for the selected band, providing a global view of intensity distribution. """ # Load the hyperspectral image data img = self.get_img() hsi = img.hsi wl = img.wavelength # Select the appropriate band index if isinstance(band, int): # Direct index selection idx = band else: # Find closest wavelength to the requested value idx = np.argmin(np.abs(wl - band)) # Extract pixel values for the selected band and flatten to 1D hist = hsi[..., idx].ravel() actual_wl = wl[idx] # Create the histogram plot _, ax = plt.subplots(figsize=(7, 5), layout="constrained") ax.grid(True, alpha=0.15, ls="dashed") # Plot normalized histogram (probability density) ax.hist(hist, bins=100, density=True, histtype="step", align="mid") # Set axis labels with wavelength information ax.set_xlabel(f"pixel values (band: {actual_wl:.2f} nm)") ax.set_ylabel("density") # Display or save the plot if output is None: plt.show() else: output = Path(output) output.parent.mkdir(parents=True, exist_ok=True) plt.savefig(str(output)) plt.close() # Close the figure to free memory