"""
Open and manipulate hyperspectral images (HSI) from Mars Reconnaissance Orbiter.
This module provides the `HSIMars` class for working with hyperspectral imaging data from the CRISM instrument aboard the Mars Reconnaissance Orbiter. It supports loading, processing, and visualising HSI data and their annotations.
The module handles:
- Loading ENVI format hyperspectral images (.hdr and .img files)
- Processing HSI data (removing bad bands, cropping, normalisation)
- Loading and aligning ground truth annotations (.mat files)
- Visualising spectral data, false-colour images, and annotations
- Plotting spectra with optional convex hull removal
- Generating histograms for spectral bands
**Author**: Riccardo Finotello <riccardo.finotello@cea.fr>
**Maintainer**: Riccardo Finotello <riccardo.finotello@cea.fr>
**Contributors**:
- Riccardo Finotello
Examples
--------
>>> from hsimars import HSIMars
>>> hsi = HSIMars(
... hdr_path="path/to/image.hdr", annotations="path/to/annotations.mat"
... )
>>> img_data = hsi.get_img()
>>> print(f"Image shape: {img_data.shape}")
>>> hsi.plot_spectra(px=[100, 200], convex_hull=True, bands=True)
"""
from __future__ import annotations
from collections import namedtuple
from pathlib import Path
from typing import NamedTuple
import cv2
import matplotlib as mpl
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from numpy.typing import NDArray
from pysptools.spectro import convex_hull_removal
from scipy.interpolate import splev, splrep
from scipy.io import loadmat
from spectral.io import envi
# Configure matplotlib for consistent styling
plt.style.use("grayscale")
mpl.rc("font", size=16)
[docs]
class HSIMars:
"""
Load and manipulate hyperspectral images from Mars Reconnaissance Orbiter.
This class provides functionality for working with CRISM hyperspectral data: loading ENVI format images, processing spectral data, handling ground truth annotations, and creating visualisations.
The class uses lazy loading for memory efficiency - data loads from disk only when first accessed, then caches for later use.
Attributes
----------
hdr_path : Path
Path to the ENVI header file (.hdr extension).
annotations : Path or None
Path to the ground truth annotations file (.mat format), if provided.
Examples
--------
>>> # Load HSI data without annotations
>>> hsi = HSIMars(hdr_path="data/sample.hdr")
>>> img_data = hsi.get_img()
>>> print(
... f"Image dimensions: {img_data.height}x{img_data.width}, {img_data.channels} channels"
... )
>>> # Load HSI data with annotations
>>> hsi = HSIMars(
... hdr_path="data/sample.hdr", annotations="data/labels.mat"
... )
>>> img_data, ann_data = hsi.data()
>>> hsi.display() # Show image with overlaid annotations
>>> # Plot spectrum for a specific pixel
>>> hsi.plot_spectra(px=[100, 200], convex_hull=True, bands=True)
Notes
-----
The ENVI .img file containing the hyperspectral data must be in the same directory as the .hdr header file.
"""
def __init__(
self,
hdr_path: str | Path,
annotations: str | Path | None = None,
label_names_path: str | Path | None = None,
):
"""
Initialise the HSIMars object with paths to data files.
Parameters
----------
hdr_path : str | Path
Path to the ENVI header file (.hdr extension) with metadata about the hyperspectral image. The corresponding .img file with spectral data must be in the same directory.
annotations : str | Path, optional
Path to the ground truth annotations file (.mat format). If None, annotation methods return None. Default is None.
label_names_path : str | Path, optional
Path to the Excel file with label name mappings. If None, looks for ``data_description.xlsx`` in the same directory as the HDR file. Default is None.
Raises
------
FileNotFoundError
If the header file does not exist.
FileNotFoundError
If annotations path is provided but the file does not exist.
FileNotFoundError
If label_names_path is provided but the file does not exist.
Examples
--------
>>> hsi = HSIMars(hdr_path="path/to/image.hdr")
>>> hsi_with_labels = HSIMars(
... hdr_path="path/to/image.hdr", annotations="path/to/labels.mat"
... )
>>> hsi_custom_labels = HSIMars(
... hdr_path="path/to/image.hdr",
... annotations="path/to/labels.mat",
... label_names_path="path/to/custom_labels.xlsx",
... )
Notes
-----
The constructor validates file paths only - no data loads until a get_* method is called. This lazy loading reduces memory usage with large datasets.
"""
self.hdr_path: Path = Path(hdr_path)
if not self.hdr_path.exists():
raise FileNotFoundError(
f"The header file {self.hdr_path} could not be found in the filesystem."
)
self.annotations: Path | None = None
if annotations is not None:
self.annotations = Path(annotations)
if not self.annotations.exists():
raise FileNotFoundError(
f"The annotation file {self.annotations} could not be found in the filesystem."
)
self.label_names_path: Path | None = None
if label_names_path is not None:
self.label_names_path = Path(label_names_path)
if not self.label_names_path.exists():
raise FileNotFoundError(
f"The label names file {self.label_names_path} could not be found in the filesystem."
)
# Cache large objects to avoid repeated disk I/O and processing
# These are populated on first access via get_* methods
self._raw: envi.BsqFile | None = None
self._img: NamedTuple | None = None
self._ann: NamedTuple | None = None
self._false_colour_bands: NDArray | None = None
[docs]
def get_raw(self) -> envi.BsqFile:
"""
Load the raw ENVI hyperspectral data file.
This method opens the ENVI format file and returns a file object for accessing the raw spectral data. The data caches after the first call to avoid redundant disk I/O.
Returns
-------
envi.BsqFile
ENVI file object for accessing the hyperspectral data. This object supports memory-mapped access for large files.
Examples
--------
>>> hsi = HSIMars(hdr_path="data/sample.hdr")
>>> raw = hsi.get_raw()
>>> print(raw.metadata["wavelength"]) # Access wavelength information
Notes
-----
The spectral data is in a .img file that must be in the same directory as the .hdr file. The ENVI library automatically finds and opens the .img file.
"""
if self._raw is None:
self._raw = envi.open(self.hdr_path)
return self._raw
[docs]
def get_img(self) -> NamedTuple:
"""
Load and process the hyperspectral image data.
This method loads the raw HSI data, performs preprocessing steps including bad band removal, cropping of invalid pixels, and normalization. The result is cached for efficient subsequent access.
The preprocessing pipeline includes:
1. Loading wavelength information and identifying bad bands
2. Removing pixels with the ignore value (65535)
3. Cropping the image to remove rows/columns with no valid data
4. Removing any remaining bad channels
5. Converting to float32 format for numerical processing
Returns
-------
NamedTuple
A named tuple (HSIMarsImageData) containing the following attributes:
- **hsi** : ndarray of shape (height, width, channels)
The processed hyperspectral image data as a 3D numpy array. Data type is float32.
- **wavelength** : ndarray of shape (channels,)
Array of wavelength values in nanometers corresponding to each spectral channel.
- **shape** : tuple of (height, width, channels)
Dimensions of the hyperspectral image.
- **height** : int
Number of pixel rows in the image.
- **width** : int
Number of pixel columns in the image.
- **channels** : int
Number of spectral bands/channels in the image.
- **dtype** : str
Data type of the HSI array ('float32').
Examples
--------
>>> hsi = HSIMars(hdr_path="data/sample.hdr")
>>> img_data = hsi.get_img()
>>> print(f"Image shape: {img_data.shape}")
>>> print(
... f"Wavelength range: {img_data.wavelength.min():.1f} - {img_data.wavelength.max():.1f} nm"
... )
>>> print(f"Data type: {img_data.dtype}")
>>> # Access specific pixel spectrum
>>> spectrum = img_data.hsi[100, 200, :]
>>> print(f"Spectrum at (100, 200): {spectrum.shape[0]} bands")
Notes
-----
The method implements lazy evaluation - the image is only loaded and processed on the first call. Subsequent calls return the cached result.
The bad band list (bbl) from the ENVI metadata is used to filter out unreliable spectral channels before further processing.
"""
if self._img is not None:
return self._img
# Get the raw data
img = self.get_raw()
# Recover the image and the metadata
img_memmap = img.open_memmap()
img_metadata = img.metadata
# Extract wavelength information and ignore value for invalid pixels
wl = np.array(img_metadata["wavelength"]).astype("float32")
ignore_value = float(img_metadata["data ignore value"])
# Select the default bands for false-color visualization
bands = np.array(img_metadata["default bands"]).astype(int)
bands = wl[bands]
# Filter out bad bands based on metadata bad band list (bbl)
bbl = np.array(img_metadata["bbl"]).astype("bool")
img_memmap = img_memmap[..., bbl]
wl = wl[bbl]
# Crop the image by removing rows/columns containing only ignore values
# This reduces memory usage and eliminates invalid border regions
mask = img_memmap != ignore_value
mask_channels = mask.sum(axis=2)
good_rows = mask_channels.sum(axis=1) > 0
good_cols = mask_channels.sum(axis=0) > 0
img_memmap = img_memmap[good_rows, :][:, good_cols]
# Find and remove any remaining bad channels that still contain
# ignore values
mask = img_memmap == ignore_value
idx = np.unique(np.argwhere(mask)[..., 2])
ch = np.ones((img_memmap.shape[2],), dtype="bool")
ch[idx] = False
img_memmap = img_memmap[..., ch]
wl = wl[ch]
# Update the default bands indices for false-color visualization
self._false_colour_bands = np.searchsorted(wl, bands).astype(int)
output = namedtuple(
"HSIMarsImageData",
[
"hsi",
"wavelength",
"shape",
"height",
"width",
"channels",
"dtype",
],
)
self._img = output(
hsi=img_memmap.astype("float32"),
wavelength=wl,
shape=img_memmap.shape,
height=img_memmap.shape[0],
width=img_memmap.shape[1],
channels=img_memmap.shape[2],
dtype="float32",
)
return self._img
def _pad_annotations(self, mat: NDArray) -> NDArray:
"""
Pad annotation matrix to match the processed HSI dimensions.
The annotation matrix may have different dimensions than the processed HSI data after cropping. This method centers the annotations within the target dimensions by adding symmetric padding.
Parameters
----------
mat : NDArray
The raw annotation matrix to be padded.
Returns
-------
NDArray
The padded annotation matrix matching HSI dimensions.
Notes
-----
Padding is distributed symmetrically around the annotation data, with any odd remainder added to the bottom/right edges.
"""
# Get target shape from processed image
img = self.get_img()
H, W = img.height, img.width
h, w = mat.shape
# Calculate symmetric padding for height and width
pad_top = (H - h) // 2
pad_bottom = H - h - pad_top
pad_left = (W - w) // 2
pad_right = W - w - pad_left
return np.pad(mat, ((pad_top, pad_bottom), (pad_left, pad_right)))
def _load_label_names(self) -> dict[int, str]:
"""
Load label names from the data description Excel file.
This method reads the data description Excel file and extracts the mapping between numerical class labels and their human-interpretable names for the current dataset.
The method automatically identifies the dataset by extracting a two-letter prefix from the HDR filename (e.g., "HC", "NF", or "UP") and locates the corresponding section in the Excel file. It then parses the class ID and class name columns to build the label mapping dictionary.
Returns
-------
dict[int, str]
A dictionary mapping numerical class labels (integers) to their corresponding human-readable class names (strings). For example: {1: 'Analcime', 2: 'Plagioclase', 3: 'Prehnite', ...}
Returns an empty dictionary if:
- The Excel file is not found
- The dataset prefix is not found in the Excel file
- The Excel file structure is invalid or cannot be parsed
Notes
-----
If ``label_names_path`` was provided during initialization, that file will be used. Otherwise, the method looks for a file named ``data_description.xlsx`` in the same directory as the HDR file.
The file should contain sections for each dataset, where each section starts with a row containing the dataset name (e.g., "HC", "NF", "UP"), followed by a header row with columns "Class ID", "Class Name", and "Total", and then data rows containing the class ID and name pairs.
This method is called internally by :meth:`get_annotations` to populate the ``label_names`` field of the ``HSIMarsAnnotationData`` named tuple.
Examples
--------
The expected Excel file structure for a dataset section:
.. code-block:: text
HC
Class ID | Class Name | Total
1 | Analcime | 940
2 | Plagioclase | 1472
3 | Prehnite | 1560
...
Warnings
--------
If the Excel file or dataset section cannot be found, this method returns an empty dictionary rather than raising an exception. This allows the annotation loading process to continue even when label names are unavailable, though the ``label_names`` field will be empty.
"""
# Determine which Excel file to use
if self.label_names_path is not None:
excel_path = self.label_names_path
else:
# Look for data_description.xlsx in same directory as HDR file
excel_path = self.hdr_path.parent / "data_description.xlsx"
# Return empty dict if Excel file doesn't exist
if not excel_path.exists():
return {}
# Extract dataset prefix from HDR filename (first 2 characters)
dataset_name = self.hdr_path.stem[:2].upper()
try:
# Read Excel file without header to access raw cell data
df = pd.read_excel(excel_path, header=None)
# Find the row containing the dataset name identifier
dataset_row_idx = None
for idx, row in df.iterrows():
if row[0] == dataset_name:
dataset_row_idx = idx
break
# Return empty dict if dataset not found in Excel
if dataset_row_idx is None:
return {}
# Skip the header row (next row after dataset name)
# and start reading class labels from the following row
data_start_row = dataset_row_idx + 2
# Parse class ID and name pairs until hitting end marker
label_map = {}
for row_idx in range(data_start_row, len(df)):
row = df.iloc[row_idx]
# Stop at empty row or "Total" row (end of section)
if pd.isna(row[0]):
break
# Parse and store class ID to name mapping
try:
class_id = int(row[0])
class_name = str(row[1]).strip()
label_map[class_id] = class_name
except (ValueError, TypeError):
# Skip rows that don't contain valid ID/name pairs
continue
return label_map
except Exception:
# Return empty dict if any error occurs during parsing
return {}
[docs]
def get_annotations(self) -> NamedTuple | None:
"""
Load and process ground truth annotation data.
Loads annotation labels from a MATLAB .mat file and aligns them with the processed HSI data dimensions. The result is cached for efficient subsequent access.
Returns
-------
NamedTuple or None
If annotations were provided during initialization, returns a named tuple (HSIMarsAnnotationData) with the following attributes:
- **labels** : ndarray of shape (height, width)
2D array containing class labels for each pixel. Values are unsigned integers representing different material classes. A value of 0 typically indicates unlabeled/background pixels.
- **shape** : tuple of (height, width)
Dimensions of the annotation matrix.
- **height** : int
Number of pixel rows (matches HSI height).
- **width** : int
Number of pixel columns (matches HSI width).
- **dtype** : str
Data type of the labels array ('uint8').
- **label_names** : dict[int, str]
Dictionary mapping numerical class labels to human-readable class names. For example: {1: 'Analcime', 2: 'Plagioclase'}. Will be an empty dictionary if the label mapping file is not found or cannot be parsed.
Returns None if no annotation file was provided during initialization.
Examples
--------
>>> hsi = HSIMars(
... hdr_path="data/sample.hdr", annotations="data/labels.mat"
... )
>>> ann_data = hsi.get_annotations()
>>> if ann_data is not None:
... print(f"Annotation shape: {ann_data.shape}")
... unique_labels = np.unique(ann_data.labels)
... print(f"Number of classes: {len(unique_labels)}")
Notes
-----
The annotation matrix is automatically padded to match the dimensions of the processed HSI data. This ensures pixel-level alignment between spectral data and labels.
The method implements lazy evaluation - annotations are only loaded on the first call. Subsequent calls return the cached result.
"""
if self._ann is not None:
return self._ann
if self.annotations is None:
return None
# Load the annotation matrix from MATLAB file
ann = loadmat(self.annotations)
# Extract the first numpy array found in the .mat file
mat = None
for v in ann.values():
if isinstance(v, np.ndarray):
mat = v.astype("uint8")
break
if mat is None:
raise ValueError(
f"No valid annotation matrix found in {self.annotations}"
)
# Pad the annotation matrix to match the processed image dimensions
mat = self._pad_annotations(mat)
# Load label names from the data description Excel file
label_names = self._load_label_names()
output = namedtuple(
"HSIMarsAnnotationData",
["labels", "shape", "height", "width", "dtype", "label_names"],
)
self._ann = output(
labels=mat,
shape=mat.shape,
height=mat.shape[0],
width=mat.shape[1],
dtype="uint8",
label_names=label_names,
)
return self._ann
[docs]
def data(self) -> tuple[NamedTuple, NamedTuple | None]:
"""
Load both hyperspectral image and annotation data.
This convenience method loads both the HSI data and annotations (if available) in a single call, ensuring both are cached for subsequent operations.
Returns
-------
tuple[NamedTuple, NamedTuple or None]
A tuple containing two elements:
1. **HSIMarsImageData** (NamedTuple):
- **hsi** : ndarray
The HSI data array of shape (height, width, channels).
- **wavelength** : ndarray
Array of wavelength values in nm.
- **shape** : tuple
Dimensions (height, width, channels).
- **height** : int
Number of pixel rows.
- **width** : int
Number of pixel columns.
- **channels** : int
Number of spectral bands.
- **dtype** : str
Data type ('float32').
2. **HSIMarsAnnotationData** (NamedTuple or None):
If annotations are available:
- **labels** : ndarray
Label array of shape (height, width).
- **shape** : tuple
Dimensions (height, width).
- **height** : int
Number of pixel rows.
- **width** : int
Number of pixel columns.
- **dtype** : str
Data type ('uint8').
- **label_names** : dict[int, str]
Dictionary mapping numerical class labels to human-readable class names.
Returns None if no annotations were provided.
Examples
--------
>>> hsi = HSIMars(
... hdr_path="data/sample.hdr", annotations="data/labels.mat"
... )
>>> img_data, ann_data = hsi.data()
>>> print(
... f"Image: {img_data.shape}, Annotations: {ann_data.shape if ann_data else 'None'}"
... )
Notes
-----
This method is equivalent to calling `get_img()` and `get_annotations()` separately, but provides a more convenient interface when both datasets are needed.
"""
return self.get_img(), self.get_annotations()
def _prepare_img(self, img: NDArray) -> NDArray:
"""
Prepare HSI data for visualization by creating a false-color RGB image.
Selects three spectral bands from the full hyperspectral cube and normalizes them to 8-bit RGB format suitable for display.
Parameters
----------
img : NDArray
The full hyperspectral image array.
Returns
-------
NDArray
8-bit RGB image suitable for display with OpenCV.
Notes
-----
Uses the default bands specified in the ENVI header for false-color visualization. These typically correspond to visible and near-infrared wavelengths that provide good visual contrast.
"""
# Select the false-color bands for RGB visualization
img = img[..., self._false_colour_bands]
# Normalize to 8-bit range [0, 255] for display
img = cv2.normalize(
img, None, 0, 255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8U
)
return img
def _prepare_ann(self, img: NDArray) -> NDArray:
"""
Prepare annotation data for visualization with color mapping.
Converts label values to an 8-bit format, applies a colormap for visual distinction between classes, and preserves background pixels as black.
Parameters
----------
img : NDArray
The annotation label array.
Returns
-------
NDArray
8-bit RGB image with colormap applied, suitable for display.
Notes
-----
Pixels with label value 0 (background/unlabeled) are displayed as black (0, 0, 0). Other labels are mapped to colors using the TURBO colormap for maximum visual distinction.
"""
# Identify background pixels (label == 0)
mask = img == 0
# Normalize labels to 8-bit range
img = cv2.normalize(
img, None, 0, 255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8U
)
# Apply TURBO colormap for visual distinction between classes
img = cv2.applyColorMap(img, cv2.COLORMAP_TURBO)
# Preserve background as black
img[mask] = (0, 0, 0)
return img
def _cv2_imshow(self, title: str, mat: NDArray) -> None:
"""
Display an image in an OpenCV window with keyboard interaction.
Creates a resizable window, displays the image, and waits for a keypress before closing the window.
Parameters
----------
title : str
Window title.
mat : NDArray
Image array to display.
Notes
-----
The window is created with WINDOW_NORMAL flag, making it resizable. Press any key to close the window.
"""
# Create a resizable named window
cv2.namedWindow(title, flags=cv2.WINDOW_NORMAL)
cv2.imshow(title, mat)
cv2.waitKey(0) # Wait for any keypress
cv2.destroyWindow(title)
[docs]
def display_hsi(self) -> None:
"""
Display the hyperspectral image as a false-color RGB visualization.
Opens an interactive OpenCV window showing the HSI data rendered using three representative spectral bands. The window can be resized and closed by pressing any key.
Examples
--------
>>> hsi = HSIMars(hdr_path="data/sample.hdr")
>>> hsi.display_hsi() # Opens window, press any key to close
Notes
-----
The false-color bands are automatically selected from the ENVI header metadata, typically representing visible and near-infrared wavelengths for optimal visual interpretation.
"""
# Load and prepare the HSI for visualization
img = self.get_img().hsi
img = self._prepare_img(img)
self._cv2_imshow(self.hdr_path.stem, img)
[docs]
def display_annotations(self) -> None:
"""
Display the ground truth annotations with color-coded labels.
Opens an interactive OpenCV window showing the annotation labels with a colormap applied for visual distinction between classes. Background pixels (label 0) are displayed in black.
Raises
------
AttributeError
If no annotations were provided during initialization.
Examples
--------
>>> hsi = HSIMars(
... hdr_path="data/sample.hdr", annotations="data/labels.mat"
... )
>>> hsi.display_annotations() # Opens window, press any key to close
Notes
-----
The TURBO colormap is applied to provide maximum visual distinction between different material classes in the annotations.
"""
# Load and prepare annotations for visualization
ann_data = self.get_annotations()
if ann_data is None:
raise ValueError(
"No annotations available. Provide annotations path during initialization."
)
ann = ann_data.labels.astype("float32")
ann = self._prepare_ann(ann)
self._cv2_imshow(self.hdr_path.stem + " - annotations", ann)
[docs]
def display(self) -> None:
"""
Display comprehensive visualization with HSI, annotations, and overlay.
Opens an interactive OpenCV window showing:
- Left panel: False-color HSI visualization
- Middle panel: Color-coded annotations (if available)
- Right panel: Semi-transparent overlay of annotations on HSI (if available)
If no annotations are provided, displays only the HSI.
Examples
--------
>>> # With annotations
>>> hsi = HSIMars(
... hdr_path="data/sample.hdr", annotations="data/labels.mat"
... )
>>> hsi.display() # Shows three-panel view
>>> # Without annotations
>>> hsi = HSIMars(hdr_path="data/sample.hdr")
>>> hsi.display() # Shows only HSI
Notes
-----
The overlay uses 75% weight for the HSI and 25% weight for the annotations, providing a good balance between seeing spectral features and class boundaries.
"""
# Load and prepare the HSI
img = self.get_img().hsi
img = self._prepare_img(img)
# If annotations are available, create a three-panel display
if self.annotations is not None:
ann_data = self.get_annotations()
if ann_data is not None:
ann = ann_data.labels.astype("float32")
ann = self._prepare_ann(ann)
# Create semi-transparent overlay (75% HSI, 25% annotations)
sup = cv2.addWeighted(img, 0.75, ann, 0.25, 0.0)
# Concatenate horizontally: HSI | Annotations | Overlay
img = cv2.hconcat([img, ann, sup])
self._cv2_imshow(self.hdr_path.stem, img)
[docs]
def plot_spectra(
self,
px: list[int, int] | list[list[int, int]] | NDArray,
convex_hull: bool = False,
bands: bool = False,
output: str | Path | None = None,
) -> None:
"""
Plot spectral signature(s) for specified pixel location(s).
Generates a line plot showing reflectance/intensity as a function of wavelength. For multiple pixels, plots the mean spectrum with standard deviation shading. Optionally applies convex hull removal to normalize continuum and highlights spectral band regions.
Parameters
----------
px : list[int, int] | list[list[int, int]] | NDArray
Pixel coordinates to extract spectra from. Can be:
- Single pixel: [row, col] or [[row, col]]
- Multiple pixels: [[row1, col1], [row2, col2], ...] or 2D array
Coordinates are in (row, column) format, 0-indexed.
convex_hull : bool, optional
If True, applies convex hull removal to normalize the spectrum continuum. This technique is useful for analyzing absorption features by removing the overall spectral shape. Default is False.
bands : bool, optional
If True, overlays colored regions indicating spectral band types:
- VIS (Visible): < 750 nm (green)
- NIR (Near-Infrared): 750-1400 nm (red)
- SWIR (Short-Wave Infrared): 1400-3000 nm (blue)
- MWIR (Mid-Wave Infrared): > 3000 nm (magenta)
Default is False.
output : str | Path, optional
Path to save the plot as an image file. If None (default), displays the plot interactively using matplotlib's show(). The directory will be created if it doesn't exist.
Examples
--------
>>> hsi = HSIMars(hdr_path="data/sample.hdr")
>>> # Plot single pixel spectrum
>>> hsi.plot_spectra(px=[100, 200])
>>> # Plot average spectrum of multiple pixels with standard deviation
>>> pixels = [[100, 200], [101, 200], [100, 201], [101, 201]]
>>> hsi.plot_spectra(px=pixels, convex_hull=True, bands=True)
>>> # Save plot to file
>>> hsi.plot_spectra(px=[100, 200], output="plots/spectrum.png")
Notes
-----
Convex hull removal is performed using the pysptools library. This technique divides the spectrum by its convex hull envelope, effectively normalizing the continuum and emphasizing absorption features.
For multiple pixels, the standard deviation is shown as a shaded region around the mean spectrum, providing visual indication of spectral variability within the selected region.
"""
# Load the hyperspectral image data
img = self.get_img()
hsi = img.hsi
wl = img.wavelength
# Convert pixel coordinates to 2D array format for consistent handling
px = np.atleast_2d(np.array(px))
# Extract spectra for the specified pixels
spec = hsi[px[:, 0], px[:, 1]]
# Calculate mean and standard deviation
if spec.shape[0] == 1:
# Single pixel: no standard deviation
spec_mean = spec[0]
spec_std = None
else:
# Multiple pixels: compute statistics
spec_mean = spec.mean(axis=0)
spec_std = spec.std(axis=0)
# Apply convex hull removal for continuum normalization if requested
if convex_hull:
spec_mean, x_hull, y_hull = convex_hull_removal(spec_mean, wl)
spec_mean = 1.0 - np.array(spec_mean)
# Also normalize the standard deviation using the convex hull
if spec_std is not None:
# Interpolate hull values at all wavelengths
interp = splrep(x_hull, y_hull, k=1)
y_hull = splev(wl, interp, der=0)
spec_std /= y_hull
# Create the plot with appropriate size and layout
_, ax = plt.subplots(figsize=(7, 5), layout="constrained")
ax.grid(True, alpha=0.15, ls="dashed")
# Overlay spectral band regions if requested
if bands:
# Position text labels at 75% of the y-axis range
y_pos = (
0.75 * (spec_mean.max() - spec_mean.min()) + spec_mean.min()
)
# Visible band (VIS): < 750 nm
ax.axvspan(wl.min(), 750, color="g", alpha=0.15)
ax.text(
wl.min() + (750 - wl.min()) / 2.0,
y_pos,
"VIS",
color="g",
rotation=90,
va="bottom",
ha="center",
)
# Near-Infrared band (NIR): 750-1400 nm
ax.axvspan(750, 1400, color="r", alpha=0.15)
ax.text(
750 + (1400 - 750) / 2.0,
y_pos,
"NIR",
color="r",
rotation=90,
va="bottom",
ha="center",
)
# Short-Wave Infrared band (SWIR): 1400-3000 nm
ax.axvspan(1400, 3000, color="b", alpha=0.15)
ax.text(
1400 + (3000 - 1400) / 2.0,
y_pos,
"SWIR",
color="b",
rotation=90,
va="bottom",
ha="center",
)
# Mid-Wave Infrared band (MWIR): > 3000 nm
ax.axvspan(3000, wl.max(), color="m", alpha=0.15)
ax.text(
3000 + (wl.max() - 3000) / 2.0,
y_pos,
"MWIR",
color="m",
rotation=90,
va="bottom",
ha="center",
)
# Plot the mean spectrum
ax.plot(wl, spec_mean, "k-", label="spectrum")
# Add standard deviation shading for multiple pixels
if spec_std is not None:
ax.fill_between(
wl,
(spec_mean - spec_std).clip(0, None), # Clip to non-negative
spec_mean + spec_std,
label="$\\pm \\sigma$",
color="k",
alpha=0.15,
)
# Add legend when showing standard deviation
ax.legend(
loc="upper center",
bbox_to_anchor=(0.5, -0.15),
ncol=2,
frameon=False,
)
# Set axis labels
ax.set_xlabel("wavelength (nm)")
ax.set_ylabel("intensity (a.u.)")
# Display or save the plot
if output is None:
plt.show()
else:
output = Path(output)
output.parent.mkdir(parents=True, exist_ok=True)
plt.savefig(str(output))
plt.close() # Close the figure to free memory
[docs]
def plot_histogram(
self, band: int | float, output: str | Path | None = None
) -> None:
"""
Plot the intensity distribution histogram for a specific spectral band.
Generates a probability density histogram showing the distribution of pixel intensity values across the image for the selected wavelength band. Useful for analyzing the statistical properties of spectral features.
Parameters
----------
band : int | float
Spectral band selector. Can be:
- **int**: Direct band index (0-based) in the spectral dimension.
- **float**: Wavelength in nanometers. The closest available wavelength will be automatically selected.
output : str | Path, optional
Path to save the histogram plot as an image file. If None (default), displays the plot interactively using matplotlib's show(). The directory will be created if it doesn't exist.
Examples
--------
>>> hsi = HSIMars(hdr_path="data/sample.hdr")
>>> # Plot histogram for band at index 100
>>> hsi.plot_histogram(band=100)
>>> # Plot histogram for band nearest to 1500 nm wavelength
>>> hsi.plot_histogram(band=1500.0)
>>> # Save histogram to file
>>> hsi.plot_histogram(
... band=1500.0, output="plots/histogram_1500nm.png"
... )
Notes
-----
The histogram uses 100 bins and is normalized to show probability density rather than raw counts. This normalization facilitates comparison between different bands or images.
The histogram includes all valid pixels in the image for the selected band, providing a global view of intensity distribution.
"""
# Load the hyperspectral image data
img = self.get_img()
hsi = img.hsi
wl = img.wavelength
# Select the appropriate band index
if isinstance(band, int):
# Direct index selection
idx = band
else:
# Find closest wavelength to the requested value
idx = np.argmin(np.abs(wl - band))
# Extract pixel values for the selected band and flatten to 1D
hist = hsi[..., idx].ravel()
actual_wl = wl[idx]
# Create the histogram plot
_, ax = plt.subplots(figsize=(7, 5), layout="constrained")
ax.grid(True, alpha=0.15, ls="dashed")
# Plot normalized histogram (probability density)
ax.hist(hist, bins=100, density=True, histtype="step", align="mid")
# Set axis labels with wavelength information
ax.set_xlabel(f"pixel values (band: {actual_wl:.2f} nm)")
ax.set_ylabel("density")
# Display or save the plot
if output is None:
plt.show()
else:
output = Path(output)
output.parent.mkdir(parents=True, exist_ok=True)
plt.savefig(str(output))
plt.close() # Close the figure to free memory