Enhance FlareDownloadProcessor to support time span mode for 1-minute cadence downloads, adding new arguments and methods for data existence checks. Update command-line arguments for improved usability. Refactor evaluation configurations and model training settings, including adjustments to checkpoint paths and model parameters for better performance.

Browse files

Files changed (13) hide show

download/flare_download_processor.py +225 -48
forecasting/inference/checkpoint_list.yaml +13 -16
forecasting/inference/evaluation.py +15 -11
forecasting/inference/evaluation_config.yaml +7 -2
forecasting/inference/inference_stereo.yaml +81 -0
forecasting/inference/inference_template.yaml +3 -3
forecasting/models/vit_patch_model.py +3 -0
forecasting/models/vit_patch_model_uncertainty.py +529 -0
forecasting/training/callback.py +6 -1
forecasting/training/config5.yaml +2 -2
forecasting/training/config6.yaml +1 -1
forecasting/training/train.py +5 -0
forecasting/training/vituncertainty.yaml +75 -0

download/flare_download_processor.py CHANGED Viewed

@@ -11,15 +11,42 @@ import flare_event_downloader as fed
 import sxr_downloader as sxr
 class FlareDownloadProcessor:
-    def __init__(self, FlareEventDownloader, SDODownloader, SXRDownloader, flaring_data=True):
         """
         Initialize the FlareDownloadProcessor.
         This class is responsible for processing AIA flare downloads.
         """
         self.FlareEventDownloader = FlareEventDownloader
         self.SDODownloader = SDODownloader
         self.SXRDownloader = SXRDownloader
         self.flaring_data = flaring_data
     def retry_download_with_backoff(self, download_func, *args, max_retries=5, base_delay=60, **kwargs):
         """
@@ -56,10 +83,18 @@ class FlareDownloadProcessor:
                 # Re-raise non-HTTP errors immediately
                 raise
-    def process_download(self, time_before_start=timedelta(minutes=5), time_after_end=timedelta(minutes=0)):
-        fl_events = self.FlareEventDownloader.download_events()
-        print(fl_events)
         [os.makedirs(os.path.join(self.SDODownloader.ds_path, str(c)), exist_ok=True) for c in
          [94, 131, 171, 193, 211, 304]]
@@ -71,6 +106,93 @@ class FlareDownloadProcessor:
                 completed_dates = set(line.strip() for line in f)
             print(f"Resuming from {len(completed_dates)} previously completed downloads")
         if self.flaring_data == True:
             print("Processing flare events...")
             if fl_events.empty:
@@ -88,8 +210,13 @@ class FlareDownloadProcessor:
                           range((end_time - start_time) // timedelta(minutes=1))]:
                     # Only download if we haven't processed this date yet
                     if d.isoformat() not in processed_dates:
-                        self.retry_download_with_backoff(self.SDODownloader.downloadDate, d)
-                        processed_dates.add(d.isoformat())
                 logging.info(f"Processed flare event {i + 1}/{len(fl_events)}: {event['event_starttime']} to {event['event_endtime']}")
         elif self.flaring_data == False:
             print("Processing non-flare events...")
@@ -136,29 +263,39 @@ class FlareDownloadProcessor:
                     for j, d in enumerate(batch):
                         # Only download if we haven't processed this date yet
                         if d.isoformat() not in processed_dates and d.isoformat() not in completed_dates:
-                            try:
-                                print(f"  Downloading data for {d} ({j+1}/{len(batch)})")
-                                self.retry_download_with_backoff(self.SDODownloader.downloadDate, d)
                                 processed_dates.add(d.isoformat())
                                 completed_dates.add(d.isoformat())
                                 # Update progress file
                                 with open(progress_file, 'a') as f:
                                     f.write(f"{d.isoformat()}\n")
-                                print(f"  ✓ Successfully downloaded {d}")
-                                # Add small delay between individual downloads
-                                if j < len(batch) - 1:
-                                    time.sleep(.2)
-                            except Exception as e:
-                                print(f"  ✗ Failed to download data for {d}: {e}")
-                                # If it's a connection error, wait longer before retrying
-                                if "Connection refused" in str(e) or "timeout" in str(e).lower():
-                                    print(f"  Waiting 10 seconds before continuing...")
-                                    time.sleep(5)
-                                continue
                         elif d.isoformat() in completed_dates:
                             print(f"  ⏭ Skipping {d} (already completed)")
                             processed_dates.add(d.isoformat())
@@ -174,17 +311,25 @@ class FlareDownloadProcessor:
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(description='Download flare events and associated SDO data.')
     parser.add_argument('--start_date', type=str, default='2023-6-15',
-                        help='Start date for downloading flare events (YYYY-MM-DD)')
     parser.add_argument('--end_date', type=str, default='2023-07-15',
-                        help='End date for downloading flare events (YYYY-MM-DD)')
     parser.add_argument('--chunk_size', type=int, default=2000,
                         help='Number of days per chunk for processing (default: 180)')
     parser.add_argument('--download_dir', type=str, default='/mnt/data',
                         help='Directory to save downloaded data (default: /mnt/data)')
     parser.add_argument('--flaring_data', dest='flaring_data', action='store_true',
                         help='Download flaring data (default)')
     parser.add_argument('--non_flaring_data', dest='flaring_data', action='store_false',
                         help='Download non-flaring data')
     parser.set_defaults(flaring_data=True)
     args = parser.parse_args()
@@ -193,29 +338,61 @@ if __name__ == '__main__':
     end_date = args.end_date
     chunk_size = args.chunk_size
     flaring_data = args.flaring_data
-    # Parse start and end dates
-    start = datetime.strptime(start_date, "%Y-%m-%d")
-    end = datetime.strptime(end_date, "%Y-%m-%d")
-    # Process in chunks
-    current_start = start
-    while current_start < end:
-        current_end = min(current_start + timedelta(days=chunk_size), end)
-        print(f"Processing chunk: {current_start.strftime('%Y-%m-%d')} to {current_end.strftime('%Y-%m-%d')}")
-        sxr_downloader = sxr.SXRDownloader(f"{download_dir}/GOES-flaring",
-                                           f"{download_dir}/GOES-flaring/combined")
-        flare_event = fed.FlareEventDownloader(
-            current_start.strftime("%Y-%m-%d"),
-            current_end.strftime("%Y-%m-%d"),
-            event_type="FL",
-            GOESCls="M1.0",
-            directory=f"{download_dir}/SDO-AIA-flaring/FlareEvents"
-        )
-        sdo_downloader = sdo.SDODownloader(f"{download_dir}/SDO-AIA-flaring", "ggoodwin5@gsu.edu")
-        processor = FlareDownloadProcessor(flare_event, sdo_downloader, sxr_downloader,
-                                           flaring_data=flaring_data)
-        processor.process_download()
-        current_start = current_end

 import sxr_downloader as sxr
 class FlareDownloadProcessor:
+    def __init__(self, FlareEventDownloader, SDODownloader, SXRDownloader, flaring_data=True, time_span_mode=False):
         """
         Initialize the FlareDownloadProcessor.
         This class is responsible for processing AIA flare downloads.
+        Args:
+            FlareEventDownloader: Downloader for flare events
+            SDODownloader: Downloader for SDO data
+            SXRDownloader: Downloader for SXR data
+            flaring_data: Whether to download flaring data (legacy mode)
+            time_span_mode: Whether to use time span mode for 1-minute cadence downloads
         """
         self.FlareEventDownloader = FlareEventDownloader
         self.SDODownloader = SDODownloader
         self.SXRDownloader = SXRDownloader
         self.flaring_data = flaring_data
+        self.time_span_mode = time_span_mode
+    def check_existing_data(self, date):
+        """
+        Check if data already exists for the given date in all required wavelengths.
+        Args:
+            date (datetime): The date to check for existing data
+        Returns:
+            bool: True if data exists for all wavelengths, False otherwise
+        """
+        wavelengths = ['94', '131', '171', '193', '211', '304']
+        date_str = date.strftime('%Y-%m-%dT%H:%M:%S')
+        for wl in wavelengths:
+            file_path = os.path.join(self.SDODownloader.ds_path, wl, f"{date_str}.fits")
+            if not os.path.exists(file_path):
+                return False
+        return True
     def retry_download_with_backoff(self, download_func, *args, max_retries=5, base_delay=60, **kwargs):
         """
                 # Re-raise non-HTTP errors immediately
                 raise
+    def process_download(self, time_before_start=timedelta(minutes=120), time_after_end=timedelta(minutes=120),
+                        start_time=None, end_time=None):
+        """
+        Process downloads either in flare mode or time span mode.
+        Args:
+            time_before_start: Time before flare start to download (legacy mode)
+            time_after_end: Time after flare end to download (legacy mode)
+            start_time: Start time for time span mode (datetime object)
+            end_time: End time for time span mode (datetime object)
+        """
+        # Create directories for SDO data
         [os.makedirs(os.path.join(self.SDODownloader.ds_path, str(c)), exist_ok=True) for c in
          [94, 131, 171, 193, 211, 304]]
                 completed_dates = set(line.strip() for line in f)
             print(f"Resuming from {len(completed_dates)} previously completed downloads")
+        if self.time_span_mode:
+            # Time span mode - download 1-minute cadence data for specified time range
+            if start_time is None or end_time is None:
+                raise ValueError("start_time and end_time must be provided for time span mode")
+            print(f"Processing time span mode: {start_time} to {end_time}")
+            print("Downloading 1-minute cadence data...")
+            # Download SXR data for the entire time span
+            self.retry_download_with_backoff(self.SXRDownloader.download_and_save_goes_data,
+                                            start_time.strftime('%Y-%m-%d'),
+                                            end_time.strftime('%Y-%m-%d'), max_workers=os.cpu_count()-1)
+            # Generate 1-minute intervals for the time span
+            processed_dates = set()
+            current_time = start_time
+            total_minutes = int((end_time - start_time).total_seconds() / 60)
+            print(f"Total time span: {total_minutes} minutes")
+            # Process in batches to avoid overwhelming the server
+            batch_size = 100  # Process 100 minutes at a time
+            batch_count = 0
+            while current_time < end_time:
+                batch_count += 1
+                batch_end = min(current_time + timedelta(minutes=batch_size), end_time)
+                batch_dates = []
+                # Generate dates for this batch
+                temp_time = current_time
+                while temp_time < batch_end:
+                    if temp_time.isoformat() not in completed_dates:
+                        # Check if data already exists in the download directory
+                        if not self.check_existing_data(temp_time):
+                            batch_dates.append(temp_time)
+                        else:
+                            print(f"  ⏭ Data already exists for {temp_time}, skipping download")
+                            completed_dates.add(temp_time.isoformat())
+                            # Update progress file
+                            with open(progress_file, 'a') as f:
+                                f.write(f"{temp_time.isoformat()}\n")
+                    temp_time += timedelta(minutes=1)
+                if batch_dates:
+                    print(f"Processing batch {batch_count}: {len(batch_dates)} minutes from {current_time} to {batch_end}")
+                    for i, d in enumerate(batch_dates):
+                        try:
+                            print(f"  Downloading data for {d} ({i+1}/{len(batch_dates)})")
+                            self.retry_download_with_backoff(self.SDODownloader.downloadDate, d)
+                            processed_dates.add(d.isoformat())
+                            completed_dates.add(d.isoformat())
+                            # Update progress file
+                            with open(progress_file, 'a') as f:
+                                f.write(f"{d.isoformat()}\n")
+                            print(f"  ✓ Successfully downloaded {d}")
+                            # Small delay between downloads
+                            if i < len(batch_dates) - 1:
+                                time.sleep(0.01)
+                        except Exception as e:
+                            print(f"  ✗ Failed to download data for {d}: {e}")
+                            if "Connection refused" in str(e) or "timeout" in str(e).lower():
+                                print(f"  Waiting 5 seconds before continuing...")
+                                time.sleep(5)
+                            continue
+                else:
+                    print(f"  ⏭ Skipping batch {batch_count} (all dates already completed)")
+                # Delay between batches
+                if batch_end < end_time:
+                    print("Waiting 5 seconds before next batch...")
+                    time.sleep(5)
+                current_time = batch_end
+            print(f"Time span processing completed. Downloaded {len(processed_dates)} data points.")
+            return
+        # Legacy flare mode
+        fl_events = self.FlareEventDownloader.download_events()
+        print(fl_events)
         if self.flaring_data == True:
             print("Processing flare events...")
             if fl_events.empty:
                           range((end_time - start_time) // timedelta(minutes=1))]:
                     # Only download if we haven't processed this date yet
                     if d.isoformat() not in processed_dates:
+                        # Check if data already exists in the download directory
+                        if not self.check_existing_data(d):
+                            self.retry_download_with_backoff(self.SDODownloader.downloadDate, d)
+                            processed_dates.add(d.isoformat())
+                        else:
+                            print(f"  ⏭ Data already exists for {d}, skipping download")
+                            processed_dates.add(d.isoformat())
                 logging.info(f"Processed flare event {i + 1}/{len(fl_events)}: {event['event_starttime']} to {event['event_endtime']}")
         elif self.flaring_data == False:
             print("Processing non-flare events...")
                     for j, d in enumerate(batch):
                         # Only download if we haven't processed this date yet
                         if d.isoformat() not in processed_dates and d.isoformat() not in completed_dates:
+                            # Check if data already exists in the download directory
+                            if not self.check_existing_data(d):
+                                try:
+                                    print(f"  Downloading data for {d} ({j+1}/{len(batch)})")
+                                    self.retry_download_with_backoff(self.SDODownloader.downloadDate, d)
+                                    processed_dates.add(d.isoformat())
+                                    completed_dates.add(d.isoformat())
+                                    # Update progress file
+                                    with open(progress_file, 'a') as f:
+                                        f.write(f"{d.isoformat()}\n")
+                                    print(f"  ✓ Successfully downloaded {d}")
+                                    # Add small delay between individual downloads
+                                    if j < len(batch) - 1:
+                                        time.sleep(.2)
+                                except Exception as e:
+                                    print(f"  ✗ Failed to download data for {d}: {e}")
+                                    # If it's a connection error, wait longer before retrying
+                                    if "Connection refused" in str(e) or "timeout" in str(e).lower():
+                                        print(f"  Waiting 10 seconds before continuing...")
+                                        time.sleep(5)
+                                    continue
+                            else:
+                                print(f"  ⏭ Data already exists for {d}, skipping download")
                                 processed_dates.add(d.isoformat())
                                 completed_dates.add(d.isoformat())
                                 # Update progress file
                                 with open(progress_file, 'a') as f:
                                     f.write(f"{d.isoformat()}\n")
                         elif d.isoformat() in completed_dates:
                             print(f"  ⏭ Skipping {d} (already completed)")
                             processed_dates.add(d.isoformat())
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(description='Download flare events and associated SDO data.')
     parser.add_argument('--start_date', type=str, default='2023-6-15',
+                        help='Start date for downloading data (YYYY-MM-DD)')
     parser.add_argument('--end_date', type=str, default='2023-07-15',
+                        help='End date for downloading data (YYYY-MM-DD)')
+    parser.add_argument('--start_time', type=str, default=None,
+                        help='Start time for time span mode (YYYY-MM-DD HH:MM:SS)')
+    parser.add_argument('--end_time', type=str, default=None,
+                        help='End time for time span mode (YYYY-MM-DD HH:MM:SS)')
     parser.add_argument('--chunk_size', type=int, default=2000,
                         help='Number of days per chunk for processing (default: 180)')
     parser.add_argument('--download_dir', type=str, default='/mnt/data',
                         help='Directory to save downloaded data (default: /mnt/data)')
+    parser.add_argument('--time_span_mode', action='store_true',
+                        help='Use time span mode for 1-minute cadence downloads')
     parser.add_argument('--flaring_data', dest='flaring_data', action='store_true',
                         help='Download flaring data (default)')
     parser.add_argument('--non_flaring_data', dest='flaring_data', action='store_false',
                         help='Download non-flaring data')
+    parser.add_argument('--email', type=str, default='ggoodwin5@gsu.edu',
+                        help='Email for SDO data download')
     parser.set_defaults(flaring_data=True)
     args = parser.parse_args()
     end_date = args.end_date
     chunk_size = args.chunk_size
     flaring_data = args.flaring_data
+    time_span_mode = args.time_span_mode
+    email = args.email
+    if time_span_mode:
+        # Time span mode - use precise start and end times
+        if args.start_time is None or args.end_time is None:
+            print("Error: --start_time and --end_time must be provided for time span mode")
+            print("Example: --start_time '2023-06-15 00:00:00' --end_time '2023-06-15 23:59:59'")
+            exit(1)
+        try:
+            start_time = datetime.strptime(args.start_time, "%Y-%m-%d %H:%M:%S")
+            end_time = datetime.strptime(args.end_time, "%Y-%m-%d %H:%M:%S")
+        except ValueError:
+            print("Error: Invalid time format. Use YYYY-MM-DD HH:MM:SS")
+            exit(1)
+        print(f"Time span mode: {start_time} to {end_time}")
+        # Initialize downloaders
+        sxr_downloader = sxr.SXRDownloader(f"{download_dir}/GOES-timespan",
+                                           f"{download_dir}/GOES-timespan/combined")
+        sdo_downloader = sdo.SDODownloader(f"{download_dir}/SDO-AIA-timespan", email)
+        # Create a dummy flare event downloader (not used in time span mode)
+        flare_event = None
+        processor = FlareDownloadProcessor(flare_event, sdo_downloader, sxr_downloader,
+                                           flaring_data=flaring_data, time_span_mode=True)
+        processor.process_download(start_time=start_time, end_time=end_time)
+    else:
+        # Legacy flare mode
+        # Parse start and end dates
+        start = datetime.strptime(start_date, "%Y-%m-%d")
+        end = datetime.strptime(end_date, "%Y-%m-%d")
+        # Process in chunks
+        current_start = start
+        while current_start < end:
+            current_end = min(current_start + timedelta(days=chunk_size), end)
+            print(f"Processing chunk: {current_start.strftime('%Y-%m-%d')} to {current_end.strftime('%Y-%m-%d')}")
+            sxr_downloader = sxr.SXRDownloader(f"{download_dir}/GOES-flaring",
+                                               f"{download_dir}/GOES-flaring/combined")
+            flare_event = fed.FlareEventDownloader(
+                current_start.strftime("%Y-%m-%d"),
+                current_end.strftime("%Y-%m-%d"),
+                event_type="FL",
+                GOESCls="M1.0",
+                directory=f"{download_dir}/SDO-AIA-flaring/FlareEvents"
+            )
+            sdo_downloader = sdo.SDODownloader(f"{download_dir}/SDO-AIA-flaring", email)
+            processor = FlareDownloadProcessor(flare_event, sdo_downloader, sxr_downloader,
+                                               flaring_data=flaring_data, time_span_mode=False)
+            processor.process_download()
+            current_start = current_end

forecasting/inference/checkpoint_list.yaml CHANGED Viewed

@@ -2,22 +2,19 @@
 # This file contains a list of model checkpoints to evaluate
 checkpoints:
-<<<<<<< HEAD
-  - name: "rs-epoch66"
-    checkpoint_path: "/mnt/data/COMBINED/new-checkpoint/vit-patch-model-2d-embeddings-reduced-sensitivity-epoch=66-val_total_loss=0.0342.ckpt"
-  # - name: "rs-base-epoch43"
-  #   checkpoint_path: "/mnt/data/COMBINED/new-checkpoint/vit-patch-model-2d-embeddings-reduced-sensitivity-changed-base-weights-epoch=43-val_total_loss=0.0470.ckpt"
-=======
-  - name: "2d-embed"
-    checkpoint_path: "/mnt/data/COMBINED/new-checkpoint/vit-patch-model-2d-embeddings-epoch=70-val_total_loss=0.0360.ckpt"
-  # - name: "rs-base-epoch39"
-  #   checkpoint_path: "/mnt/data/COMBINED/new-checkpoint/vit-patch-model-2d-embeddings-reduced-sensitivity-changed-base-weights-epoch=39-val_total_loss=0.0467.ckpt"
->>>>>>> 3d436f6d4c6b15827a7e8e923a105d7ba89b2c7c
-  # - name: "rs-epoch50"
-  #   checkpoint_path: "/mnt/data/COMBINED/new-checkpoint/vit-patch-model-2d-embeddings-reduced-sensitivity-epoch=50-val_total_loss=0.0382.ckpt"
 # Add more checkpoints as needed
 # Each checkpoint should have:

 # This file contains a list of model checkpoints to evaluate
 checkpoints:
+  # - name: "claude-final"
+  #   checkpoint_path: "/mnt/data/COMBINED/new-checkpoint/vit-patch-model-2d-embeddings-claude-suggested-weights-final-20250921_225446.pth"
+  # - name: "rs-final"
+  #   checkpoint_path: "/mnt/data/COMBINED/new-checkpoint/vit-patch-model-2d-embeddings-reduced-sensitivity-final-20250921_185953.pth"
+  # - name: "baseweights-final"
+  #   checkpoint_path: "/mnt/data/COMBINED/new-checkpoint/vit-patch-model-2d-embeddings-reduced-sensitivity-changed-base-weights-final-20250921_223323.pth"
+  - name: "claude-mse"
+    checkpoint_path: "/mnt/data/COMBINED/new-checkpoint/vit-mse-claude-epoch=62-val_total_loss=0.1904.ckpt"
+  - name: "baseweights-mse"
+    checkpoint_path: /mnt/data/COMBINED/new-checkpoint/vit-mse-base-weights-epoch=62-val_total_loss=0.2893.ckpt"
+  # - name: "stereo-final"
+  #   checkpoint_path: "/mnt/data/COMBINED/new-checkpoint/vit-patch-model-2d-embeddings-reduced-sensitivity-STEREO-final-20250921_183739.pth"
 # Add more checkpoints as needed
 # Each checkpoint should have:

forecasting/inference/evaluation.py CHANGED Viewed

@@ -344,7 +344,7 @@ class SolarFlareEvaluator:
                 'B': (1e-7, 1e-6,  "#FFAAA5"),
                 'C': (1e-6, 1e-5, "#FFAAA5"),
                 'M': (1e-5, 1e-4, "#FFAAA5"),
-                'X': (1e-4, 1e-3, "#FFAAA5")
             }
             for class_name, (min_flux, max_flux, color) in flare_classes_mae.items():
@@ -646,10 +646,10 @@ class SolarFlareEvaluator:
             # Create figure with transparent background
             fig = plt.figure(figsize=(10, 5))
             fig.patch.set_alpha(0.0)  # Transparent background
-            gs_left = fig.add_gridspec(1, 1, left=0.0, right=0.35, width_ratios=[1], hspace=0, wspace=0.0)
             # Right gridspec for SXR plot (column 3) with more padding
-            gs_right = fig.add_gridspec(2, 1, left=0.45, right=1, hspace=0)
             wavs = ['94', '131', '171', '193', '211', '304']
             att_max = np.percentile(attention_data, 100)
@@ -675,9 +675,9 @@ class SolarFlareEvaluator:
             # Plot SXR data with uncertainty bands
             sxr_ax = fig.add_subplot(gs_right[:, 0])
-            # Set SXR plot background to match regression plot
-            sxr_ax.set_facecolor('#FFEEE6')  # Light background for SXR plot
-            sxr_ax.patch.set_alpha(1.0)      # Make sure axes patch is opaque
             if sxr_window is not None and not sxr_window.empty:
                 # Plot ground truth (no uncertainty)
@@ -787,8 +787,10 @@ class SolarFlareEvaluator:
                     text.set_fontfamily('Barlow')
                 sxr_ax.grid(True, alpha=0.3, color='black')
-                sxr_ax.tick_params(axis='x', rotation=15, labelsize=12, colors='white')
-                sxr_ax.tick_params(axis='y', labelsize=12, colors='white')
                 # Set tick labels to Barlow font and white color
                 for label in sxr_ax.get_xticklabels():
@@ -797,8 +799,10 @@ class SolarFlareEvaluator:
                 for label in sxr_ax.get_yticklabels():
                     label.set_fontfamily('Barlow')
                     label.set_color('white')
-                # for spine in sxr_ax.spines.values():
-                #     spine.set_color('white')
                 try:
                     sxr_ax.set_yscale('log')
                 except:
@@ -814,7 +818,7 @@ class SolarFlareEvaluator:
             #plt.suptitle(f'Timestamp: {timestamp}', fontsize=14)
             #plt.tight_layout()
-            plt.savefig(save_path, dpi=500, facecolor='none', transparent=True)
             plt.close()
             print(f"Worker {os.getpid()}: Completed {timestamp}")

                 'B': (1e-7, 1e-6,  "#FFAAA5"),
                 'C': (1e-6, 1e-5, "#FFAAA5"),
                 'M': (1e-5, 1e-4, "#FFAAA5"),
+                'X': (1e-4, 1e-2, "#FFAAA5")
             }
             for class_name, (min_flux, max_flux, color) in flare_classes_mae.items():
             # Create figure with transparent background
             fig = plt.figure(figsize=(10, 5))
             fig.patch.set_alpha(0.0)  # Transparent background
+            gs_left = fig.add_gridspec(1, 1, left=0.0, right=0.35, width_ratios=[1], hspace=0, wspace=0.1)
             # Right gridspec for SXR plot (column 3) with more padding
+            gs_right = fig.add_gridspec(2, 1, left=0.45, right=1, hspace=0.1)
             wavs = ['94', '131', '171', '193', '211', '304']
             att_max = np.percentile(attention_data, 100)
             # Plot SXR data with uncertainty bands
             sxr_ax = fig.add_subplot(gs_right[:, 0])
+            # Set SXR plot background to have light background inside plot area
+            sxr_ax.set_facecolor('#FFEEE6')  # Light background for SXR plot area
+            sxr_ax.patch.set_alpha(1.0)      # Make axes patch opaque
             if sxr_window is not None and not sxr_window.empty:
                 # Plot ground truth (no uncertainty)
                     text.set_fontfamily('Barlow')
                 sxr_ax.grid(True, alpha=0.3, color='black')
+                sxr_ax.tick_params(axis='x', rotation=15, labelsize=12, colors='white',
+                                )
+                sxr_ax.tick_params(axis='y', labelsize=12, colors='white',
+                                )
                 # Set tick labels to Barlow font and white color
                 for label in sxr_ax.get_xticklabels():
                 for label in sxr_ax.get_yticklabels():
                     label.set_fontfamily('Barlow')
                     label.set_color('white')
+                # Set graph border (spines) to white
+                for spine in sxr_ax.spines.values():
+                    spine.set_color('white')
                 try:
                     sxr_ax.set_yscale('log')
                 except:
             #plt.suptitle(f'Timestamp: {timestamp}', fontsize=14)
             #plt.tight_layout()
+            plt.savefig(save_path, dpi=500, facecolor='none',bbox_inches='tight')
             plt.close()
             print(f"Worker {os.getpid()}: Completed {timestamp}")

forecasting/inference/evaluation_config.yaml CHANGED Viewed

@@ -21,9 +21,14 @@ evaluation:
                     # Examples: 1e-6 (C-class and above), 1e-5 (M-class and above), 1e-4 (X-class only)
 # Time range for evaluation
 time_range:
-  start_time: "2023-08-05T20:30:00"
-  end_time: "2023-08-05T23:56:00"
   interval_minutes: 1
 # Plotting parameters

                     # Examples: 1e-6 (C-class and above), 1e-5 (M-class and above), 1e-4 (X-class only)
 # Time range for evaluation
+# time_range:
+#   start_time: "2023-08-05T20:30:00"
+#   end_time: "2023-08-05T23:56:00"
+#   interval_minutes: 1
 time_range:
+  start_time: "2014-08-01T00:00:00"
+  end_time: "2014-08-31T23:59:00"
   interval_minutes: 1
 # Plotting parameters

forecasting/inference/inference_stereo.yaml ADDED Viewed

	@@ -0,0 +1,81 @@

+# Base inference configuration template
+# This will be used as a template for each checkpoint evaluation
+# Base directories
+base_data_dir: "/mnt/data/COMBINED"
+output_path: "PLACEHOLDER_OUTPUT_PATH"  # Will be replaced by batch script
+weight_path: "PLACEHOLDER_WEIGHT_PATH"  # Will be replaced by batch script
+# Dataset configuration
+SolO: "false"
+Stereo: "false"
+# Model configuration
+model: "vit"  # Options: "vit", "hybrid", "vitpatch", "fusion"
+wavelengths: [171, 193, 211, 304]  # AIA wavelengths in Angstroms
+# MC Dropout configuration
+mc:
+  active: "false"
+  runs: 5
+# SolO data configuration (if using SolO dataset)
+SolO_data:
+  solo_img_dir: "/mnt/data/ML-Ready_clean/SolO/SolO/ML-Ready-SolO"
+  sxr_dir: "${base_data_dir}/SXR"
+  sxr_norm_path: "${base_data_dir}/SolO/SXR/normalized_sxr.npy"
+# Stereo data configuration (if using Stereo dataset)
+Stereo_data:
+  stereo_img_dir: "/mnt/data/ML-Ready-mixed/STEREO_processed"
+  sxr_dir: "/mnt/data/ML-Ready-mixed/ML-Ready-mixed/SXR"
+  sxr_norm_path: "/mnt/data/ML-READY/SXR/normalized_sxr.npy"
+# Model parameters
+model_params:
+  input_size: 512
+  patch_size: 16
+  batch_size: 16
+  no_weights: false  # Set to true to skip saving attention weights
+# Model architecture parameters (should match training config)
+vit_custom:
+  embed_dim: 512
+  num_channels: 4
+  num_classes: 1
+  patch_size: 16
+  num_patches: 1024
+  hidden_dim: 512
+  num_heads: 8
+  num_layers: 6
+  dropout: 0.1
+# Data paths
+data:
+  aia_dir: "${base_data_dir}/AIA-SPLIT/"
+  sxr_dir: "${base_data_dir}/SXR-SPLIT/"
+  sxr_norm_path: "${base_data_dir}/SXR-SPLIT/normalized_sxr.npy"
+  checkpoint_path: "PLACEHOLDER_CHECKPOINT_PATH"  # Will be replaced by batch script
+# MEGSAI parameters (should match training config)
+megsai:
+  cnn_model: "updated"
+  cnn_dp: 0.2
+  weight_decay: 1e-5
+  cosine_restart_T0: 50
+  cosine_restart_Tmult: 2
+  cosine_eta_min: 1e-7
+# Fusion parameters (if using fusion model)
+fusion:
+  scalar_branch: "hybrid"
+  lr: 0.0001
+  lambda_vit_to_target: 0.3
+  lambda_scalar_to_target: 0.1
+  learnable_gate: true
+  gate_init_bias: 5.0
+  scalar_kwargs:
+    d_input: 6
+    d_output: 1
+    cnn_model: "updated"
+    cnn_dp: 0.75

forecasting/inference/inference_template.yaml CHANGED Viewed

@@ -44,10 +44,10 @@ vit_custom:
   num_channels: 6
   num_classes: 1
   patch_size: 16
-  num_patches: 4096
   hidden_dim: 512
-  num_heads: 16
-  num_layers: 3
   dropout: 0.1
 # Data paths

   num_channels: 6
   num_classes: 1
   patch_size: 16
+  num_patches: 1024
   hidden_dim: 512
+  num_heads: 8
+  num_layers: 6
   dropout: 0.1
 # Data paths

forecasting/models/vit_patch_model.py CHANGED Viewed

@@ -84,6 +84,7 @@ class ViT(pl.LightningModule):
         #Also calculate huber loss for logging
         huber_loss = F.huber_loss(norm_preds_squeezed, sxr, delta=.3)
         # Log adaptation info
@@ -349,6 +350,7 @@ class SXRRegressionDynamicLoss:
     def calculate_loss(self, preds_norm, sxr_norm, sxr_un):
         base_loss = F.huber_loss(preds_norm, sxr_norm, delta=.3, reduction='none')
         weights = self._get_adaptive_weights(sxr_un)
         self._update_tracking(sxr_un, sxr_norm, preds_norm)
         weighted_loss = base_loss * weights
@@ -462,6 +464,7 @@ class SXRRegressionDynamicLoss:
         #Huber loss
         error = F.huber_loss(preds_norm, sxr_norm, delta=.3, reduction='none')
         error = error.detach().cpu().numpy()

         #Also calculate huber loss for logging
         huber_loss = F.huber_loss(norm_preds_squeezed, sxr, delta=.3)
+        #huber_loss = F.mse_loss(norm_preds_squeezed, sxr)
         # Log adaptation info
     def calculate_loss(self, preds_norm, sxr_norm, sxr_un):
         base_loss = F.huber_loss(preds_norm, sxr_norm, delta=.3, reduction='none')
+        #base_loss = F.mse_loss(preds_norm, sxr_norm, reduction='none')
         weights = self._get_adaptive_weights(sxr_un)
         self._update_tracking(sxr_un, sxr_norm, preds_norm)
         weighted_loss = base_loss * weights
         #Huber loss
         error = F.huber_loss(preds_norm, sxr_norm, delta=.3, reduction='none')
+        #error = F.mse_loss(preds_norm, sxr_norm, reduction='none')
         error = error.detach().cpu().numpy()

forecasting/models/vit_patch_model_uncertainty.py ADDED Viewed

	@@ -0,0 +1,529 @@

+from collections import deque
+import math
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+import torch.utils.data as data
+import torchvision
+from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint
+from torchvision import transforms
+import pytorch_lightning as pl
+from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts
+#norm = np.load("/mnt/data/ML-Ready_clean/mixed_data/SXR/normalized_sxr.npy")
+def normalize_sxr(unnormalized_values, sxr_norm):
+    """Convert from unnormalized to normalized space"""
+    log_values = torch.log10(unnormalized_values + 1e-8)
+    normalized = (log_values - float(sxr_norm[0].item())) / float(sxr_norm[1].item())
+    return normalized
+def unnormalize_sxr(normalized_values, sxr_norm):
+    return 10 ** (normalized_values * float(sxr_norm[1].item()) + float(sxr_norm[0].item())) - 1e-8
+class ViTUncertainty(pl.LightningModule):
+    def __init__(self, model_kwargs, sxr_norm, base_weights=None):
+        super().__init__()
+        self.model_kwargs = model_kwargs
+        self.lr = model_kwargs['lr']
+        self.save_hyperparameters()
+        filtered_kwargs = dict(model_kwargs)
+        filtered_kwargs.pop('lr', None)
+        filtered_kwargs.pop('num_classes', None)
+        self.model = VisionTransformer(**filtered_kwargs)
+        #Set the base weights based on the number of samples in each class within training data
+        self.base_weights = base_weights
+        self.adaptive_loss = SXRRegressionDynamicLoss(window_size=15000, base_weights=self.base_weights)
+        self.sxr_norm = sxr_norm
+    def forward(self, x, return_attention=True):
+        return self.model(x, self.sxr_norm, return_attention=return_attention)
+    def forward_for_callback(self, x, return_attention=True):
+        """Forward method compatible with AttentionMapCallback"""
+        global_flux_raw, attention_weights, patch_flux_raw, patch_error = self.forward(x, return_attention=return_attention)
+        return global_flux_raw, attention_weights
+    def configure_optimizers(self):
+        # Use AdamW with weight decay for better regularization
+        optimizer = torch.optim.AdamW(
+            self.parameters(),
+            lr=self.lr,
+            weight_decay=0.00001,
+        )
+        scheduler = CosineAnnealingWarmRestarts(
+            optimizer,
+            T_0=50,  # Restart every 20 epochs
+            T_mult=2,  # Double the cycle length after each restart
+            eta_min=1e-7  # Minimum learning rate
+        )
+        return {
+            'optimizer': optimizer,
+            'lr_scheduler': {
+                'scheduler': scheduler,
+                'interval': 'epoch',
+                'frequency': 1,
+                'name': 'learning_rate'
+            }
+        }
+    # M/X Class Flare Detection Optimized Weights
+    def _calculate_loss(self, batch, mode="train"):
+        imgs, sxr = batch
+        raw_preds, raw_patch_contributions, raw_error = self.model(imgs,self.sxr_norm)
+        raw_preds_squeezed = torch.squeeze(raw_preds)
+        sxr_un = unnormalize_sxr(sxr, self.sxr_norm)
+        norm_preds_squeezed = normalize_sxr(raw_preds_squeezed, self.sxr_norm)
+        raw_error_squeezed = torch.squeeze(raw_error)
+        # Use adaptive rare event loss
+        loss, error_loss, weights = self.adaptive_loss.calculate_loss(
+            norm_preds_squeezed, sxr, sxr_un, raw_error_squeezed
+        )
+        #Also calculate huber loss for logging
+        huber_loss = F.huber_loss(norm_preds_squeezed, sxr, delta=.3)
+        #huber_loss = F.mse_loss(norm_preds_squeezed, sxr)
+        # Log adaptation info
+        if mode == "train":
+            # Always log learning rate (every step)
+            current_lr = self.trainer.optimizers[0].param_groups[0]['lr']
+            self.log('learning_rate', current_lr, on_step=True, on_epoch=False,
+                     prog_bar=True, logger=True, sync_dist=True)
+            #self.log("sparsity_entropy_loss", sparsity_or_entropy, on_step=True, on_epoch=True, )
+            self.log("train_total_loss", loss, on_step=True, on_epoch=True,
+                     prog_bar=True, logger=True, sync_dist=True)
+            self.log("train_huber_loss", huber_loss, on_step=True, on_epoch=True,
+                     prog_bar=True, logger=True, sync_dist=True)
+            self.log("train_error_loss", error_loss, on_step=True, on_epoch=True,
+                     prog_bar=True, logger=True, sync_dist=True)
+            # Detailed diagnostics only every 200 steps
+            if self.global_step % 200 == 0:
+                multipliers = self.adaptive_loss.get_current_multipliers()
+                for key, value in multipliers.items():
+                    self.log(f"adaptive/{key}", value, on_step=True, on_epoch=False)
+                self.log("adaptive/avg_weight", weights.mean(), on_step=True, on_epoch=False)
+                self.log("adaptive/max_weight", weights.max(), on_step=True, on_epoch=False)
+        if mode == "val":
+            # Validation: typically only log epoch aggregates
+            multipliers = self.adaptive_loss.get_current_multipliers()
+            for key, value in multipliers.items():
+                self.log(f"val/adaptive/{key}", value, on_step=False, on_epoch=True)
+            self.log("val_total_loss", loss, on_step=False, on_epoch=True, prog_bar=True, logger=True, sync_dist=True)
+            self.log("val_huber_loss", huber_loss, on_step=False, on_epoch=True, prog_bar=True, logger=True, sync_dist=True)
+            self.log("val_error_loss", error_loss, on_step=False, on_epoch=True, prog_bar=True, logger=True, sync_dist=True)
+        return loss
+    def training_step(self, batch, batch_idx):
+        return self._calculate_loss(batch, mode="train")
+    def validation_step(self, batch, batch_idx):
+        self._calculate_loss(batch, mode="val")
+    def test_step(self, batch, batch_idx):
+        self._calculate_loss(batch, mode="test")
+    def apply_wavelength_dropout(self, x, dropout_prob=0.3):
+        """Randomly zero out some wavelengths during training"""
+        if self.training and torch.rand(1).item() < dropout_prob:
+            # x shape: [B, H, W, num_channels]
+            num_keep = torch.randint(1, self.model_kwargs['num_channels'], (1,)).item()
+            keep_indices = torch.randperm(self.model_kwargs['num_channels'])[:num_keep]
+            mask = torch.zeros(self.model_kwargs['num_channels'], device=x.device)
+            mask[keep_indices] = 1.0
+            x = x * mask.view(1, 1, 1, -1)
+        return x
+class VisionTransformer(nn.Module):
+    def __init__(
+            self,
+            embed_dim,
+            hidden_dim,
+            num_channels,
+            num_heads,
+            num_layers,
+            patch_size,
+            num_patches,
+            dropout
+    ):
+        """Vision Transformer that outputs flux contributions per patch.
+        Args:
+            embed_dim: Dimensionality of the input feature vectors to the Transformer
+            hidden_dim: Dimensionality of the hidden layer in the feed-forward networks
+                         within the Transformer
+            num_channels: Number of channels of the input (3 for RGB)
+            num_heads: Number of heads to use in the Multi-Head Attention block
+            num_layers: Number of layers to use in the Transformer
+            patch_size: Number of pixels that the patches have per dimension
+            num_patches: Maximum number of patches an image can have
+            dropout: Amount of dropout to apply in the feed-forward network and
+                      on the input encoding
+        """
+        super().__init__()
+        self.patch_size = patch_size
+        # Layers/Networks
+        self.input_layer = nn.Linear(num_channels * (patch_size ** 2), embed_dim)
+        self.transformer_blocks = nn.ModuleList([
+            AttentionBlock(embed_dim, hidden_dim, num_heads, dropout=dropout)
+            for _ in range(num_layers)
+        ])
+        self.mlp_head = nn.Sequential(nn.LayerNorm(embed_dim), nn.Linear(embed_dim, 1))
+        self.error_head = nn.Sequential(nn.LayerNorm(embed_dim), nn.Linear(embed_dim, 1))
+        self.dropout = nn.Dropout(dropout)
+        # Parameters/Embeddings
+        self.cls_token = nn.Parameter(torch.randn(1, 1, embed_dim))
+        self.pos_embedding = nn.Parameter(torch.randn(1, 1 + num_patches, embed_dim))
+        self.grid_h = int(math.sqrt(num_patches))
+        self.grid_w = int(math.sqrt(num_patches))
+        self.pos_embedding_2d = nn.Parameter(torch.randn(1, self.grid_h, self.grid_w, embed_dim))
+    def forward(self, x, sxr_norm, return_attention=False):
+        # Preprocess input
+        x = img_to_patch(x, self.patch_size)
+        B, T, _ = x.shape
+        x = self.input_layer(x)
+        # Add CLS token and positional encoding
+        #cls_token = self.cls_token.repeat(B, 1, 1)
+        #x = torch.cat([cls_token, x], dim=1)
+        #x = x + self.pos_embedding[:, : T + 1]
+        x = self._add_2d_positional_encoding(x)
+        # Apply Transformer blocks
+        x = self.dropout(x)
+        x = x.transpose(0, 1)  # [T, B, embed_dim]
+        attention_weights = []
+        for block in self.transformer_blocks:
+            if return_attention:
+                x, attn_weights = block(x, return_attention=True)
+                attention_weights.append(attn_weights)
+            else:
+                x = block(x)
+        #Extract patch logits and total error
+        patch_embeddings = x.transpose(0, 1)  # [B, num_patches, embed_dim]
+        patch_logits = self.mlp_head(patch_embeddings).squeeze(-1)  # normalized log predictions [B, num_patches]
+        patch_error = self.error_head(patch_embeddings).squeeze(-1)  # [B, num_patches]
+        # --- Convert to raw SXR ---
+        mean, std = sxr_norm  # in log10 space
+        patch_flux_raw = torch.clamp(10 ** (patch_logits * std + mean)- 1e-8, min=1e-15, max=1)
+        patch_error_raw = torch.clamp(10 ** (patch_error * std + mean)- 1e-8, min=1e-30, max=1)
+        # Sum over patches for raw global flux
+        global_flux_raw = patch_flux_raw.sum(dim=1, keepdim=True)
+        #Calculate total error as sqrt of sum of squares of patch errors
+        total_error = torch.sqrt(patch_error_raw.pow(2).sum(dim=1, keepdim=True))
+        # Ensure global flux is never zero (add small epsilon if needed)
+        global_flux_raw = torch.clamp(global_flux_raw, min=1e-15)
+        if return_attention:
+            return global_flux_raw, attention_weights, patch_flux_raw, total_error
+        else:
+            return global_flux_raw, patch_flux_raw, total_error
+    def _add_2d_positional_encoding(self, x):
+        """Add learned 2D positional encoding to patch embeddings"""
+        B, T, embed_dim = x.shape
+        num_patches = T  # Exclude CLS token
+        # Reshape patches to 2D grid: [B, grid_h, grid_w, embed_dim]
+        patch_embeddings = x.reshape(B, self.grid_h, self.grid_w, embed_dim)
+        # Add learned 2D positional encoding
+        # Broadcasting: [B, grid_h, grid_w, embed_dim] + [1, grid_h, grid_w, embed_dim]
+        patch_embeddings = patch_embeddings + self.pos_embedding_2d
+        # Reshape back to sequence format: [B, num_patches, embed_dim]
+        patch_embeddings = patch_embeddings.reshape(B, num_patches, embed_dim)
+        return patch_embeddings
+class AttentionBlock(nn.Module):
+    def __init__(self, embed_dim, hidden_dim, num_heads, dropout=0.0):
+        """Attention Block.
+        Args:
+            embed_dim: Dimensionality of input and attention feature vectors
+            hidden_dim: Dimensionality of hidden layer in feed-forward network
+                         (usually 2-4x larger than embed_dim)
+            num_heads: Number of heads to use in the Multi-Head Attention block
+            dropout: Amount of dropout to apply in the feed-forward network
+        """
+        super().__init__()
+        self.layer_norm_1 = nn.LayerNorm(embed_dim)
+        self.attn = nn.MultiheadAttention(embed_dim, num_heads, batch_first=False)
+        self.layer_norm_2 = nn.LayerNorm(embed_dim)
+        self.linear = nn.Sequential(
+            nn.Linear(embed_dim, hidden_dim),
+            nn.GELU(),
+            nn.Dropout(dropout),
+            nn.Linear(hidden_dim, embed_dim),
+            nn.Dropout(dropout),
+        )
+    def forward(self, x, return_attention=False):
+        inp_x = self.layer_norm_1(x)
+        if return_attention:
+            attn_output, attn_weights = self.attn(inp_x, inp_x, inp_x, average_attn_weights=False)
+            x = x + attn_output
+            x = x + self.linear(self.layer_norm_2(x))
+            return x, attn_weights
+        else:
+            attn_output = self.attn(inp_x, inp_x, inp_x)[0]
+            x = x + attn_output
+            x = x + self.linear(self.layer_norm_2(x))
+            return x
+def img_to_patch(x, patch_size, flatten_channels=True):
+    """
+    Args:
+        x: Tensor representing the image of shape [B, H, W, C]
+        patch_size: Number of pixels per dimension of the patches (integer)
+        flatten_channels: If True, the patches will be returned in a flattened format
+                           as a feature vector instead of a image grid.
+    """
+    x = x.permute(0, 3, 1, 2)
+    B, C, H, W = x.shape
+    x = x.reshape(B, C, H // patch_size, patch_size, W // patch_size, patch_size)
+    x = x.permute(0, 2, 4, 1, 3, 5)  # [B, H', W', C, p_H, p_W]
+    x = x.flatten(1, 2)  # [B, H'*W', C, p_H, p_W]
+    if flatten_channels:
+        x = x.flatten(2, 4)  # [B, H'*W', C*p_H*p_W]
+    return x
+class SXRRegressionDynamicLoss:
+    def __init__(self, window_size=15000, base_weights=None):
+        self.c_threshold = 1e-6
+        self.m_threshold = 1e-5
+        self.x_threshold = 1e-4
+        self.window_size = window_size
+        self.quiet_errors = deque(maxlen=window_size)
+        self.c_errors = deque(maxlen=window_size)
+        self.m_errors = deque(maxlen=window_size)
+        self.x_errors = deque(maxlen=window_size)
+        #Calculate the base weights based on the number of samples in each class within training data
+        if base_weights is None:
+            self.base_weights = self._get_base_weights()
+        else:
+            self.base_weights = base_weights
+    def _get_base_weights(self):
+        #Calculate the base weights based on the number of samples in each class within training data
+        return {
+            'quiet': 1.5,    # Increase from current value
+            'c_class': 1.0,  # Keep as baseline
+            'm_class': 8.0,  # Maintain M-class focus
+            'x_class': 20.0  # Maintain X-class focus
+        }
+    def calculate_loss(self, preds_norm, sxr_norm, sxr_un, raw_error):
+        base_loss = F.huber_loss(preds_norm, sxr_norm, delta=.3, reduction='none')
+        #Calculate loss between error and raw error
+        error = abs(sxr_norm - preds_norm)
+        error_loss = F.huber_loss(raw_error, error, reduction='none')
+        #base_loss = F.mse_loss(preds_norm, sxr_norm, reduction='none')
+        weights = self._get_adaptive_weights(sxr_un)
+        self._update_tracking(sxr_un, sxr_norm, preds_norm)
+        weighted_loss = base_loss * weights
+        error_weight = .2
+        error_loss = error_weight * error_loss.mean()
+        loss = weighted_loss.mean() + error_loss
+        return loss, error_loss, weights
+    def _get_adaptive_weights(self, sxr_un):
+        device = sxr_un.device
+        # Get continuous multipliers per class with custom params
+        quiet_mult = self._get_performance_multiplier(
+            self.quiet_errors, max_multiplier=1.5, min_multiplier=0.6, sensitivity=0.05, sxrclass='quiet'  # Was 0.2
+        )
+        c_mult = self._get_performance_multiplier(
+            self.c_errors, max_multiplier=2, min_multiplier=0.7, sensitivity=0.08, sxrclass='c_class'    # Was 0.3
+        )
+        m_mult = self._get_performance_multiplier(
+            self.m_errors, max_multiplier=5.0, min_multiplier=0.8, sensitivity=0.1, sxrclass='m_class'   # Was 0.4
+        )
+        x_mult = self._get_performance_multiplier(
+            self.x_errors, max_multiplier=8.0, min_multiplier=0.8, sensitivity=0.12, sxrclass='x_class'  # Was 0.5
+        )
+        quiet_weight = self.base_weights['quiet'] * quiet_mult
+        c_weight = self.base_weights['c_class'] * c_mult
+        m_weight = self.base_weights['m_class'] * m_mult
+        x_weight = self.base_weights['x_class'] * x_mult
+        weights = torch.ones_like(sxr_un, device=device)
+        weights = torch.where(sxr_un < self.c_threshold, quiet_weight, weights)
+        weights = torch.where((sxr_un >= self.c_threshold) & (sxr_un < self.m_threshold), c_weight, weights)
+        weights = torch.where((sxr_un >= self.m_threshold) & (sxr_un < self.x_threshold), m_weight, weights)
+        weights = torch.where(sxr_un >= self.x_threshold, x_weight, weights)
+        # Normalize so mean weight ~1.0 (optional, helps stability)
+        mean_weight = torch.mean(weights)
+        weights = weights / (mean_weight)
+        # Clamp extreme weights
+        #weights = torch.clamp(weights, min=0.01, max=40.0)
+        # Save for logging
+        self.current_multipliers = {
+            'quiet_mult': quiet_mult,
+            'c_mult': c_mult,
+            'm_mult': m_mult,
+            'x_mult': x_mult,
+            'quiet_weight': quiet_weight,
+            'c_weight': c_weight,
+            'm_weight': m_weight,
+            'x_weight': x_weight
+        }
+        return weights
+    def _get_performance_multiplier(self, error_history, max_multiplier=10.0, min_multiplier=0.5, sensitivity=3.0, sxrclass='quiet'):
+        """Class-dependent performance multiplier"""
+        class_params = {
+            'quiet': {'min_samples': 2500, 'recent_window': 800},
+            'c_class': {'min_samples': 2500, 'recent_window': 800},
+            'm_class': {'min_samples': 1500, 'recent_window': 500},
+            'x_class': {'min_samples': 1000, 'recent_window': 300}
+        }
+        # target_errors = {
+        #     'quiet': 0.15,
+        #     'c_class': 0.08,
+        #     'm_class': 0.05,
+        #     'x_class': 0.05
+        # }
+        #target = target_errors[sxrclass]
+        if len(error_history) < class_params[sxrclass]['min_samples']:
+            return 1.0
+        recent_window = class_params[sxrclass]['recent_window']
+        recent = np.mean(list(error_history)[-recent_window:])
+        overall = np.mean(list(error_history))
+        # if overall < 1e-10:
+        #     return 1.0
+        ratio = recent / overall
+        multiplier = np.exp(sensitivity * (ratio - 1))
+        return np.clip(multiplier, min_multiplier, max_multiplier)
+        # if len(error_history) < class_params[sxrclass]['min_samples']:
+        #     return 1.0
+        # recent = np.mean(list(error_history)[-class_params[sxrclass]['recent_window']:])
+        # if recent > target:  # Not meeting target - increase weight
+        #     excess_error = (recent - target) / target
+        #     multiplier = 1.0 + sensitivity * excess_error
+        # else:  # Meeting/exceeding target
+        #     if sxrclass == 'quiet':
+        #         # Can reduce quiet weight significantly
+        #         multiplier = max(0.5, 1.0 - 0.5 * (target - recent) / target)
+        #     else:
+        #         # Keep important classes weighted well even when performing good
+        #         multiplier = max(0.8, 1.0 - 0.2 * (target - recent) / target)
+        # return np.clip(multiplier, min_multiplier, max_multiplier)
+    def _update_tracking(self, sxr_un, sxr_norm, preds_norm):
+        sxr_un_np = sxr_un.detach().cpu().numpy()
+        #Huber loss
+        error = F.huber_loss(preds_norm, sxr_norm, delta=.3, reduction='none')
+        #error = F.mse_loss(preds_norm, sxr_norm, reduction='none')
+        error = error.detach().cpu().numpy()
+        quiet_mask = sxr_un_np < self.c_threshold
+        if quiet_mask.sum() > 0:
+            self.quiet_errors.append(float(np.mean(error[quiet_mask])))
+        c_mask = (sxr_un_np >= self.c_threshold) & (sxr_un_np < self.m_threshold)
+        if c_mask.sum() > 0:
+            self.c_errors.append(float(np.mean(error[c_mask])))
+        m_mask = (sxr_un_np >= self.m_threshold) & (sxr_un_np < self.x_threshold)
+        if m_mask.sum() > 0:
+            self.m_errors.append(float(np.mean(error[m_mask])))
+        x_mask = sxr_un_np >= self.x_threshold
+        if x_mask.sum() > 0:
+            self.x_errors.append(float(np.mean(error[x_mask])))
+    def get_current_multipliers(self):
+        """Get current performance multipliers for logging"""
+        return {
+            'quiet_mult': self._get_performance_multiplier(
+                self.quiet_errors, max_multiplier=1.5, min_multiplier=0.6, sensitivity=0.2, sxrclass='quiet'
+            ),
+            'c_mult': self._get_performance_multiplier(
+                self.c_errors, max_multiplier=2, min_multiplier=0.7, sensitivity=0.3, sxrclass='c_class'
+            ),
+            'm_mult': self._get_performance_multiplier(
+                self.m_errors, max_multiplier=5.0, min_multiplier=0.8, sensitivity=0.8, sxrclass='m_class'
+            ),
+            'x_mult': self._get_performance_multiplier(
+                self.x_errors, max_multiplier=8.0, min_multiplier=0.8, sensitivity=1.0, sxrclass='x_class'
+            ),
+            'quiet_count': len(self.quiet_errors),
+            'c_count': len(self.c_errors),
+            'm_count': len(self.m_errors),
+            'x_count': len(self.x_errors),
+            'quiet_error': np.mean(self.quiet_errors) if self.quiet_errors else 0.0,
+            'c_error': np.mean(self.c_errors) if self.c_errors else 0.0,
+            'm_error': np.mean(self.m_errors) if self.m_errors else 0.0,
+            'x_error': np.mean(self.x_errors) if self.x_errors else 0.0,
+            'quiet_weight': getattr(self, 'current_multipliers', {}).get('quiet_weight', 0.0),
+            'c_weight': getattr(self, 'current_multipliers', {}).get('c_weight', 0.0),
+            'm_weight': getattr(self, 'current_multipliers', {}).get('m_weight', 0.0),
+            'x_weight': getattr(self, 'current_multipliers', {}).get('x_weight', 0.0)
+        }

forecasting/training/callback.py CHANGED Viewed

@@ -132,7 +132,12 @@ class AttentionMapCallback(Callback):
             except:
                 # For ViT patch model, we need to call the model's forward method directly
                 if hasattr(pl_module, 'model') and hasattr(pl_module.model, 'forward'):
-                    outputs, attention_weights, _ = pl_module.model(imgs, pl_module.sxr_norm, return_attention=True)
                 else:
                     outputs, attention_weights = pl_module.forward_for_callback(imgs, return_attention=True)

             except:
                 # For ViT patch model, we need to call the model's forward method directly
                 if hasattr(pl_module, 'model') and hasattr(pl_module.model, 'forward'):
+                    try:
+                        print("Using model's forward method")
+                        outputs, attention_weights, _ = pl_module.model(imgs, pl_module.sxr_norm, return_attention=True)
+                    except:
+                        print("Using model's forward method failed")
+                        outputs, attention_weights = pl_module.forward_for_callback(imgs, return_attention=True)
                 else:
                     outputs, attention_weights = pl_module.forward_for_callback(imgs, return_attention=True)

forecasting/training/config5.yaml CHANGED Viewed

@@ -12,7 +12,7 @@ batch_size:    64
 epochs:        250
 oversample: false
 balance_strategy: "upsample_minority"
-calculate_base_weights: false  # Whether to calculate class-based weights for loss function
 megsai:
   architecture: "cnn"
@@ -71,5 +71,5 @@ wandb:
     - aia
     - sxr
     - regression
-  wb_name: vit-patch-model-2d-embeddings-reduced-sensitivity-changed-base-weights
   notes: Regression from AIA images (6 channels) to GOES SXR flux

 epochs:        250
 oversample: false
 balance_strategy: "upsample_minority"
+calculate_base_weights: true  # Whether to calculate class-based weights for loss function
 megsai:
   architecture: "cnn"
     - aia
     - sxr
     - regression
+  wb_name: vit-mse-base-weights
   notes: Regression from AIA images (6 channels) to GOES SXR flux

forecasting/training/config6.yaml CHANGED Viewed

@@ -71,5 +71,5 @@ wandb:
     - aia
     - sxr
     - regression
-  wb_name: vit-patch-model-2d-embeddings-claude-suggested-weights
   notes: Regression from AIA images (6 channels) to GOES SXR flux

     - aia
     - sxr
     - regression
+  wb_name: vit-mse-claude
   notes: Regression from AIA images (6 channels) to GOES SXR flux

forecasting/training/train.py CHANGED Viewed

@@ -24,6 +24,7 @@ from forecasting.data_loaders.SDOAIA_dataloader import AIA_GOESDataModule
 from forecasting.models.vision_transformer_custom import ViT
 from forecasting.models.linear_and_hybrid import LinearIrradianceModel, HybridIrradianceModel
 from forecasting.models.vit_patch_model import ViT as ViTPatch
 from forecasting.models import FusionViTHybrid
 from callback import ImagePredictionLogger_SXR, AttentionMapCallback
 from pytorch_lightning.callbacks import Callback
@@ -344,6 +345,10 @@ elif config_data['selected_model'] == 'ViTPatch':
     base_weights = get_base_weights(data_loader, sxr_norm) if config_data.get('calculate_base_weights', True) else None
     model = ViTPatch(model_kwargs=config_data['vit_custom'], sxr_norm = sxr_norm, base_weights=base_weights)
 elif config_data['selected_model'] == 'FusionViTHybrid':
     # Expect a 'fusion' section in YAML
     fusion_cfg = config_data.get('fusion', {})

 from forecasting.models.vision_transformer_custom import ViT
 from forecasting.models.linear_and_hybrid import LinearIrradianceModel, HybridIrradianceModel
 from forecasting.models.vit_patch_model import ViT as ViTPatch
+from forecasting.models.vit_patch_model_uncertainty import ViTUncertainty
 from forecasting.models import FusionViTHybrid
 from callback import ImagePredictionLogger_SXR, AttentionMapCallback
 from pytorch_lightning.callbacks import Callback
     base_weights = get_base_weights(data_loader, sxr_norm) if config_data.get('calculate_base_weights', True) else None
     model = ViTPatch(model_kwargs=config_data['vit_custom'], sxr_norm = sxr_norm, base_weights=base_weights)
+elif config_data['selected_model'] == 'ViTUncertainty':
+    base_weights = get_base_weights(data_loader, sxr_norm) if config_data.get('calculate_base_weights', True) else None
+    model = ViTUncertainty(model_kwargs=config_data['vit_custom'], sxr_norm = sxr_norm, base_weights=base_weights)
 elif config_data['selected_model'] == 'FusionViTHybrid':
     # Expect a 'fusion' section in YAML
     fusion_cfg = config_data.get('fusion', {})

forecasting/training/vituncertainty.yaml ADDED Viewed

	@@ -0,0 +1,75 @@

+#Base directories - change these to switch datasets
+base_data_dir: "/mnt/data/COMBINED"  # Change this line for different datasets
+base_checkpoint_dir: "/mnt/data/COMBINED"    # Change this line for different datasets
+wavelengths: [94, 131, 171, 193, 211, 304]  # AIA wavelengths in Angstroms
+# GPU configuration
+gpu_id: 0  # GPU device ID to use (0, 1, 2, etc.) or -1 for CPU only
+# Model configuration
+selected_model: "ViTUncertainty"  # Options: "hybrid", "vit", "fusion", "vitpatch"
+batch_size:    64
+epochs:        250
+oversample: false
+balance_strategy: "upsample_minority"
+calculate_base_weights: false  # Whether to calculate class-based weights for loss function
+megsai:
+  architecture: "cnn"
+  seed: 42
+  lr: 0.0001
+  cnn_model: "updated"
+  cnn_dp: 0.2
+  weight_decay: 1e-5
+  cosine_restart_T0: 50
+  cosine_restart_Tmult: 2
+  cosine_eta_min: 1e-7
+vit_custom:
+    embed_dim: 512
+    num_channels: 6
+    num_classes: 2
+    patch_size: 16
+    num_patches: 1024
+    hidden_dim: 512
+    num_heads: 8
+    num_layers: 6
+    dropout: 0.1
+    lr: 0.0001
+fusion:
+  scalar_branch: "hybrid"        # or "linear"
+  lr: 0.0001
+  lambda_vit_to_target: 0.3
+  lambda_scalar_to_target: 0.1
+  learnable_gate: true
+  gate_init_bias: 5.0
+  scalar_kwargs:
+    d_input: 6
+    d_output: 1
+    cnn_model: "updated"
+    cnn_dp: 0.75
+# Data paths (automatically constructed from base directories)
+data:
+  aia_dir:
+    "${base_data_dir}/AIA-SPLIT"
+  sxr_dir:
+    "${base_data_dir}/SXR-SPLIT"
+  sxr_norm_path:
+    "${base_data_dir}/SXR-SPLIT/normalized_sxr.npy"
+  checkpoints_dir:
+    "${base_checkpoint_dir}/new-checkpoint/"
+wandb:
+  entity: jayantbiradar619-university-of-arizona # Use your exact W&B username
+  project: Model Testing
+  job_type: training
+  tags:
+    - aia
+    - sxr
+    - regression
+  wb_name: vit-uncertainty-claude
+  notes: Regression from AIA images (6 channels) to GOES SXR flux