Spaces:
Sleeping
Sleeping
| import cv2 | |
| import numpy as np | |
| import torch | |
| from torchvision import transforms | |
| class FFTProcessor: | |
| def __init__(self, size=224): | |
| self.size = size | |
| def process_image(self, image): | |
| """ | |
| Process a single image (numpy array, RGB) into its FFT representation. | |
| Args: | |
| image: numpy array of shape (H, W, 3) in RGB format. | |
| Returns: | |
| fft_feature: torch tensor of shape (1, size, size) | |
| """ | |
| # 1. Convert to grayscale | |
| gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) | |
| # 2. Resize to target size (if not already) - strictly speaking specs say resize at end, | |
| # but FFT on 224x224 is standard. However, the specs say "Resize to 224x224x1" at the end. | |
| # Let's perform FFT on the resized image to ensure consistent frequency resolution. | |
| gray = cv2.resize(gray, (self.size, self.size)) | |
| # 3. Apply FFT | |
| f = np.fft.fft2(gray) | |
| # 4. Shift zero-frequency to center | |
| fshift = np.fft.fftshift(f) | |
| # 5. Take log magnitude spectrum | |
| magnitude_spectrum = 20 * np.log(np.abs(fshift) + 1e-8) # Add epsilon to avoid log(0) | |
| # 6. Normalize to [0,1] | |
| # Normalize based on min/max of the current image or global stats? | |
| # Usually per-image normalization is robust for this task. | |
| m_min = np.min(magnitude_spectrum) | |
| m_max = np.max(magnitude_spectrum) | |
| if m_max - m_min > 1e-8: | |
| norm_spectrum = (magnitude_spectrum - m_min) / (m_max - m_min) | |
| else: | |
| norm_spectrum = np.zeros_like(magnitude_spectrum) | |
| # 7. Convert to tensor and add channel dimension | |
| # Output: 224x224x1 -> (1, 224, 224) for PyTorch | |
| fft_feature = torch.from_numpy(norm_spectrum).float().unsqueeze(0) | |
| return fft_feature | |