Spaces:

Shree2604
/

BioStack

Sleeping

App Files Files Community

Shree2604 commited on Feb 24

Commit

361b4d2

verified ·

1 Parent(s): f0f2c4a

Update server.py

Browse files

Files changed (1) hide show

server.py +402 -341

server.py CHANGED Viewed

@@ -2,6 +2,7 @@ import io
 import torch
 import torch.nn as nn
 import timm
 import traceback
 import os
 from PIL import Image
@@ -12,7 +13,7 @@ from transformers import T5ForConditionalGeneration, T5Tokenizer
 from huggingface_hub import hf_hub_download
 # ─────────────────────────────────────────────────────────────────────────────
-# CONFIGURATION - Matching Colab Notebook Exactly
 # ─────────────────────────────────────────────────────────────────────────────
 CONFIG = {
     'coatnet_model': 'coatnet_1_rw_224',
@@ -20,8 +21,6 @@ CONFIG = {
     'img_emb_dim': 768,
     'train_last_stages': 2,
     'image_size': 224,
-    'max_length': 100,
-    'num_beams': 4,
 }
 # ─────────────────────────────────────────────────────────────────────────────
@@ -31,17 +30,18 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print(f"🖥️  Using device: {device}")
 # ─────────────────────────────────────────────────────────────────────────────
-# LOAD TOKENIZER - Matching Colab
 # ─────────────────────────────────────────────────────────────────────────────
 print("\n" + "="*80)
-print("LOADING TOKENIZER")
 print("="*80)
 tokenizer = T5Tokenizer.from_pretrained(CONFIG['t5_model'])
 print(f"✓ Loaded tokenizer: {CONFIG['t5_model']}")
-# ─────────────────────────────────────────────────────────────────────────────
-# IMAGE TRANSFORM - Matching Colab Exactly
-# ─────────────────────────────────────────────────────────────────────────────
 transform = transforms.Compose([
     transforms.Resize((CONFIG['image_size'], CONFIG['image_size'])),
     transforms.ToTensor(),
@@ -52,429 +52,490 @@ transform = transforms.Compose([
 ])
 print(f"✓ Image transform defined (size: {CONFIG['image_size']}x{CONFIG['image_size']})")
 # ─────────────────────────────────────────────────────────────────────────────
-# ARCHITECTURE 1: CoAtNetEncoder - Exactly from Colab SECTION 6
 # ─────────────────────────────────────────────────────────────────────────────
 class CoAtNetEncoder(nn.Module):
-    def __init__(self, model_name="coatnet_1_rw_224", pretrained=True, train_last_stages=2):
         super().__init__()
-        self.encoder = timm.create_model(
-            model_name,
-            pretrained=pretrained,
-            num_classes=0,
-            global_pool="avg"
-        )
-        # Freeze all parameters
-        for p in self.encoder.parameters():
-            p.requires_grad = False
-        # Unfreeze last stages
-        if hasattr(self.encoder, "stages") and train_last_stages is not None:
-            stages = self.encoder.stages
-            for stage in stages[-train_last_stages:]:
-                for p in stage.parameters():
-                    p.requires_grad = True
     def forward(self, x):
-        return self.encoder(x)
 # ─────────────────────────────────────────────────────────────────────────────
-# ARCHITECTURE 2: VisionT5Model - Exactly from Colab SECTION 6
 # ─────────────────────────────────────────────────────────────────────────────
-class VisionT5Model(nn.Module):
     def __init__(self, img_encoder, txt_model_name="t5-small", img_emb_dim=768):
         super().__init__()
-        # Vision encoder (CoAtNet)
         self.img_encoder = img_encoder
-        # Text decoder (T5)
         self.t5 = T5ForConditionalGeneration.from_pretrained(txt_model_name)
-        # Projection layer to match image features with T5 d_model
         self.proj = nn.Linear(img_emb_dim, self.t5.config.d_model)
-        # Freeze shared T5 embeddings for faster and stable training
         for p in self.t5.shared.parameters():
             p.requires_grad = False
-    def forward(self, pixel_values, input_ids, attention_mask, labels=None):
-        # Extract image features
-        img_feats = self.img_encoder(pixel_values)
-        # Project image features to T5 embedding space
-        img_feats = self.proj(img_feats)
-        # Add sequence dimension
-        encoder_hidden_states = img_feats.unsqueeze(1)
-        # Run T5 encoder using image embeddings
-        encoder_outputs = self.t5.encoder(
-            inputs_embeds=encoder_hidden_states
-        )
-        # Run T5 decoder and compute loss
-        outputs = self.t5(
-            encoder_outputs=encoder_outputs,
-            attention_mask=torch.ones(
-                encoder_hidden_states.size()[:2], device=device
-            ),
-            input_ids=input_ids,
-            labels=labels,
-        )
-        return outputs
-    def generate_reports(self, pixel_values, max_length=100, num_beams=4):
-        """
-        Generate reports - EXACTLY matching Colab SECTION 6
-        """
-        # Extract and project image features
-        img_feats = self.img_encoder(pixel_values)
-        img_feats = self.proj(img_feats)
-        encoder_hidden_states = img_feats.unsqueeze(1)
-        # Encode image features
-        encoder_outputs = self.t5.encoder(
-            inputs_embeds=encoder_hidden_states
-        )
-        # Generate report using beam search - EXACT parameters from Colab
-        generated_ids = self.t5.generate(
-            encoder_outputs=encoder_outputs,
-            attention_mask=torch.ones(
-                encoder_hidden_states.size()[:2], device=device
-            ),
-            max_length=max_length,
-            num_beams=num_beams,
-            early_stopping=True
-        )
-        return generated_ids
-print("✓ Model architecture classes defined")
-# ─────────────────────────────────────────────────────────────────────────────
-# MODEL LOADING FUNCTION - Exactly from Colab SECTION 8
-# ─────────────────────────────────────────────────────────────────────────────
-def load_model_from_checkpoint(checkpoint_path: str, model_name: str, config: dict):
-    """
-    Load VisionT5Model from checkpoint - EXACT implementation from Colab
-    """
-    print(f"\nLoading {model_name} model...")
-    print(f"  Checkpoint: {checkpoint_path}")
-    try:
-        # Create image encoder
-        print(f"  Creating CoAtNet encoder: {config['coatnet_model']}")
-        img_encoder = CoAtNetEncoder(
-            model_name=config['coatnet_model'],
-            pretrained=False,  # Weights will come from checkpoint
-            train_last_stages=config['train_last_stages']
-        )
-        # Create full model
-        print(f"  Creating VisionT5 model with T5: {config['t5_model']}")
-        model = VisionT5Model(
-            img_encoder=img_encoder,
-            txt_model_name=config['t5_model'],
-            img_emb_dim=config['img_emb_dim']
-        )
-        # Load checkpoint
-        print(f"  Loading checkpoint weights...")
-        checkpoint = torch.load(checkpoint_path, map_location=device)
-        # Handle different checkpoint formats
-        if isinstance(checkpoint, dict):
-            if 'model_state_dict' in checkpoint:
-                state_dict = checkpoint['model_state_dict']
-                print(f"  Found 'model_state_dict' in checkpoint")
-            elif 'state_dict' in checkpoint:
-                state_dict = checkpoint['state_dict']
-                print(f"  Found 'state_dict' in checkpoint")
-            elif 'model' in checkpoint:
-                state_dict = checkpoint['model']
-                print(f"  Found 'model' in checkpoint")
-            else:
-                # Assume checkpoint is the state dict
-                state_dict = checkpoint
-                print(f"  Using checkpoint as state_dict directly")
-            # Print additional checkpoint info if available
-            if 'epoch' in checkpoint:
-                print(f"  Checkpoint epoch: {checkpoint['epoch']}")
-            if 'loss' in checkpoint:
-                print(f"  Checkpoint loss: {checkpoint['loss']:.4f}")
-        else:
-            state_dict = checkpoint
-            print(f"  Checkpoint is a state_dict")
-        # Load state dict
-        missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False)
-        if missing_keys:
-            print(f"  ⚠️ Missing keys: {len(missing_keys)}")
-            if len(missing_keys) <= 5:
-                for key in missing_keys:
-                    print(f"    - {key}")
-        if unexpected_keys:
-            print(f"  ⚠️ Unexpected keys: {len(unexpected_keys)}")
-            if len(unexpected_keys) <= 5:
-                for key in unexpected_keys:
-                    print(f"    - {key}")
-        # Move to device and set to eval mode
-        model = model.to(device)
-        model.eval()
-        print(f"✓ {model_name} model loaded successfully!")
-        return model
-    except Exception as e:
-        print(f"❌ Error loading {model_name} model: {str(e)}")
-        import traceback
-        traceback.print_exc()
-        raise
 # ─────────────────────────────────────────────────────────────────────────────
-# INFERENCE FUNCTION - Exactly from Colab SECTION 9
 # ─────────────────────────────────────────────────────────────────────────────
-def generate_report(
-    image_path: str,
-    model: VisionT5Model,
-    config: dict
-) -> str:
     """
-    Generate medical report from X-ray image - EXACT implementation from Colab
     """
-    try:
-        # Preprocess image
-        image = Image.open(image_path).convert('RGB')
-        pixel_values = transform(image).unsqueeze(0).to(device)
-        # Generate report - using EXACT parameters from Colab
-        with torch.no_grad():
-            generated_ids = model.generate_reports(
-                pixel_values,
-                max_length=config['max_length'],
-                num_beams=config['num_beams']
             )
-        # Decode
-        report = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
-        return report.strip()
-    except Exception as e:
-        print(f"Error generating report for {image_path}: {str(e)}")
-        return ""
 # ─────────────────────────────────────────────────────────────────────────────
-# LOAD MODELS FROM HUGGINGFACE
 # ─────────────────────────────────────────────────────────────────────────────
-print("\n" + "="*80)
-print("LOADING MODELS FROM HUGGINGFACE")
-print("="*80)
-# Download model files from Hugging Face
-try:
-    SFT_MODEL_PATH = hf_hub_download(
-        repo_id="vinaykumarhs2020/RLHF_radiology_model",
-        filename="best_model.pt"
-    )
-    PPO_MODEL_PATH = hf_hub_download(
-        repo_id="vinaykumarhs2020/RLHF_radiology_model",
-        filename="rlhf_model.pt"
     )
-    print(f"✓ Downloaded SFT model: {SFT_MODEL_PATH}")
-    print(f"✓ Downloaded PPO model: {PPO_MODEL_PATH}")
-except Exception as e:
-    print(f"❌ Error downloading models: {e}")
-    # Fallback to local paths if downloads fail
-    SFT_MODEL_PATH = "/content/best_model.pt"
-    PPO_MODEL_PATH = "/content/rlhf_model.pt"
-    print(f"⚠️ Using local paths instead")
-# Load both models
-print("\n" + "="*80)
-print("LOADING MODELS")
-print("="*80)
-sft_model = load_model_from_checkpoint(
-    SFT_MODEL_PATH,
-    "SFT",
-    CONFIG
-)
-ppo_model = load_model_from_checkpoint(
-    PPO_MODEL_PATH,
-    "PPO",
-    CONFIG
-)
-print("\n✓ Both models loaded successfully!")
 # ─────────────────────────────────────────────────────────────────────────────
 # FASTAPI APP
 # ─────────────────────────────────────────────────────────────────────────────
-app = FastAPI(title="Medical Report Generation - Matching Colab")
 app.add_middleware(
     CORSMiddleware,
-    allow_origins=["*"],
     allow_methods=["*"],
     allow_headers=["*"],
 )
-def preprocess_bytes(file_bytes: bytes) -> torch.Tensor:
-    """Preprocess image bytes for inference"""
-    img = Image.open(io.BytesIO(file_bytes)).convert("RGB")
-    return transform(img).unsqueeze(0).to(device)
 @app.get("/health")
 def health():
-    return {
-        "status": "ok",
-        "device": str(device),
-        "models_loaded": True,
-        "config": CONFIG
-    }
 @app.post("/sft")
 async def sft_inference(file: UploadFile = File(...)):
-    """
-    SFT model inference - EXACTLY matching Colab behavior
-    """
     try:
-        # Preprocess image
-        tensor = preprocess_bytes(await file.read())
-        # Generate report using EXACT Colab parameters
-        with torch.no_grad():
-            generated_ids = sft_model.generate_reports(
-                tensor,
-                max_length=CONFIG['max_length'],
-                num_beams=CONFIG['num_beams']
-            )
-        # Decode - EXACTLY as Colab does
-        report = tokenizer.decode(generated_ids[0], skip_special_tokens=True).strip()
         print(f"[SFT] Generated: {report}")
-        # Return FULL report without truncation
-        return {"report": report, "model": "SFT", "config_used": CONFIG}
     except Exception as e:
         traceback.print_exc()
-        return {"report": f"ERROR: {str(e)}", "model": "SFT"}
-@app.post("/ppo")
-async def ppo_inference(file: UploadFile = File(...)):
-    """
-    PPO model inference - EXACTLY matching Colab behavior
-    """
     try:
-        # Preprocess image
-        tensor = preprocess_bytes(await file.read())
-        # Generate report using EXACT Colab parameters
         with torch.no_grad():
-            generated_ids = ppo_model.generate_reports(
-                tensor,
-                max_length=CONFIG['max_length'],
-                num_beams=CONFIG['num_beams']
-            )
-        # Decode - EXACTLY as Colab does
-        report = tokenizer.decode(generated_ids[0], skip_special_tokens=True).strip()
-        print(f"[PPO] Generated: {report}")
-        # Return FULL report without truncation
-        return {"report": report, "model": "PPO", "config_used": CONFIG}
     except Exception as e:
         traceback.print_exc()
-        return {"report": f"ERROR: {str(e)}", "model": "PPO"}
-@app.post("/compare")
-async def compare_models(file: UploadFile = File(...)):
-    """
-    Generate reports from both models for comparison
-    """
     try:
-        file_bytes = await file.read()
-        tensor = preprocess_bytes(file_bytes)
-        # SFT Generation
-        with torch.no_grad():
-            sft_ids = sft_model.generate_reports(
-                tensor,
-                max_length=CONFIG['max_length'],
-                num_beams=CONFIG['num_beams']
-            )
-        sft_report = tokenizer.decode(sft_ids[0], skip_special_tokens=True).strip()
-        # PPO Generation
-        with torch.no_grad():
-            ppo_ids = ppo_model.generate_reports(
-                tensor,
-                max_length=CONFIG['max_length'],
-                num_beams=CONFIG['num_beams']
-            )
-        ppo_report = tokenizer.decode(ppo_ids[0], skip_special_tokens=True).strip()
-        print(f"[COMPARE] SFT: {sft_report}")
-        print(f"[COMPARE] PPO: {ppo_report}")
-        return {
-            "sft_report": sft_report,
-            "ppo_report": ppo_report,
-            "config_used": CONFIG
-        }
     except Exception as e:
         traceback.print_exc()
-        return {
-            "sft_report": f"ERROR: {str(e)}",
-            "ppo_report": f"ERROR: {str(e)}"
-        }
-@app.get("/debug_config")
-def debug_config():
-    """Debug endpoint to check configuration"""
-    return {
-        "config": CONFIG,
-        "device": str(device),
-        "tokenizer": CONFIG['t5_model'],
-        "image_size": CONFIG['image_size'],
-        "max_length": CONFIG['max_length'],
-        "num_beams": CONFIG['num_beams'],
-        "models_loaded": {
-            "sft": sft_model is not None,
-            "ppo": ppo_model is not None
-        }
-    }
 # ─────────────────────────────────────────────────────────────────────────────
-# STATIC FILE SERVING
 # ─────────────────────────────────────────────────────────────────────────────
 from fastapi.staticfiles import StaticFiles
 if os.path.exists("build"):
     app.mount("/", StaticFiles(directory="build", html=True), name="static")
     print("✅ React app mounted at /")

 import torch
 import torch.nn as nn
 import timm
+import pickle
 import traceback
 import os
 from PIL import Image
 from huggingface_hub import hf_hub_download
 # ─────────────────────────────────────────────────────────────────────────────
+# CONFIGURATION
 # ─────────────────────────────────────────────────────────────────────────────
 CONFIG = {
     'coatnet_model': 'coatnet_1_rw_224',
     'img_emb_dim': 768,
     'train_last_stages': 2,
     'image_size': 224,
 }
 # ─────────────────────────────────────────────────────────────────────────────
 print(f"🖥️  Using device: {device}")
 # ─────────────────────────────────────────────────────────────────────────────
+# SECTION 7: Load Tokenizer and Image Transform
 # ─────────────────────────────────────────────────────────────────────────────
 print("\n" + "="*80)
+print("LOADING TOKENIZER AND IMAGE TRANSFORM")
 print("="*80)
+# Load tokenizer
 tokenizer = T5Tokenizer.from_pretrained(CONFIG['t5_model'])
 print(f"✓ Loaded tokenizer: {CONFIG['t5_model']}")
+# Define image transform
 transform = transforms.Compose([
     transforms.Resize((CONFIG['image_size'], CONFIG['image_size'])),
     transforms.ToTensor(),
 ])
 print(f"✓ Image transform defined (size: {CONFIG['image_size']}x{CONFIG['image_size']})")
+def preprocess_image(image_path: str) -> torch.Tensor:
+    """Load and preprocess image."""
+    image = Image.open(image_path).convert('RGB')
+    return transform(image)
 # ─────────────────────────────────────────────────────────────────────────────
+# ARCHITECTURE 1 — CoAtNet Encoder  (shared by all three models)
+# Matches BOTH notebooks exactly.
 # ─────────────────────────────────────────────────────────────────────────────
 class CoAtNetEncoder(nn.Module):
+    def __init__(self, model_name=None, pretrained=False, train_last_stages=None):
         super().__init__()
+        # Use CONFIG defaults if not specified
+        model_name = model_name or CONFIG['coatnet_model']
+        train_last_stages = train_last_stages or CONFIG['train_last_stages']
+        # pretrained=False at inference time — weights come from .pt file
+        self.backbone = timm.create_model(model_name, pretrained=pretrained)
+        for name, param in self.backbone.named_parameters():
+            param.requires_grad = False
+            for i in range(5 - train_last_stages, 5):
+                if f"stages.{i}" in name:
+                    param.requires_grad = True
+                    break
+        # Detect feature_dim dynamically (same as RM/PPO notebook Cell 4)
+        with torch.no_grad():
+            dummy = torch.randn(1, 3, 224, 224)
+            features = self.backbone.forward_features(dummy)
+            if len(features.shape) == 4:
+                features = features.mean(dim=[2, 3])
+            self.feature_dim = features.shape[-1]
+        print(f"   CoAtNetEncoder feature_dim = {self.feature_dim}")
     def forward(self, x):
+        features = self.backbone.forward_features(x)
+        if len(features.shape) == 4:
+            features = features.mean(dim=[2, 3])
+        return features
 # ─────────────────────────────────────────────────────────────────────────────
+# ARCHITECTURE 2 — SFT VisionT5Model
+# BUG FIX: Uses self.t5 and self.proj — exactly matching best_model.pt keys
+# from SFT notebook Cell 33. Do NOT rename these to txt_model/img_proj.
 # ─────────────────────────────────────────────────────────────────────────────
+class SFTVisionT5Model(nn.Module):
     def __init__(self, img_encoder, txt_model_name="t5-small", img_emb_dim=768):
         super().__init__()
         self.img_encoder = img_encoder
+        # ← self.t5  (NOT self.txt_model — must match saved keys)
         self.t5 = T5ForConditionalGeneration.from_pretrained(txt_model_name)
+        # ← self.proj (NOT self.img_proj — must match saved keys)
         self.proj = nn.Linear(img_emb_dim, self.t5.config.d_model)
         for p in self.t5.shared.parameters():
             p.requires_grad = False
+    def generate_reports(self, pixel_values, max_length=100):
+        self.eval()
+        with torch.no_grad():
+            # Extract + project image features
+            img_feats = self.img_encoder(pixel_values)          # [B, feature_dim]
+            img_feats = self.proj(img_feats)                    # [B, d_model]
+            encoder_hidden_states = img_feats.unsqueeze(1)     # [B, 1, d_model]
+            # Encode
+            encoder_outputs = self.t5.encoder(
+                inputs_embeds=encoder_hidden_states
+            )
+            attn = torch.ones(
+                encoder_hidden_states.size()[:2], device=pixel_values.device
+            )
+            # BUG FIX 3: repetition_penalty + no_repeat_ngram_size breaks
+            # the "Projection: Projection: Projection:" loop
+            generated_ids = self.t5.generate(
+                encoder_outputs=encoder_outputs,
+                attention_mask=attn,
+                max_length=max_length,
+                num_beams=4,
+                early_stopping=True,
+                no_repeat_ngram_size=3,
+                repetition_penalty=1.3,
+            )
+        reports = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+        # Strip any leading "Projection: X." prefix that leaked from training data
+        cleaned = []
+        for r in reports:
+            if r.lower().startswith("projection:"):
+                # Remove the first "Projection: X." segment
+                parts = r.split(".", 1)
+                r = parts[1].strip() if len(parts) > 1 else r
+            cleaned.append(r)
+        return cleaned
+# ─────────────────────────────────────────────────────────────────────────────
+# ARCHITECTURE 3 — PPO VisionT5Model
+# Uses self.txt_model and self.img_proj — matching RM/PPO notebook Cell 4.
+# ─────────────────────────────────────────────────────────────────────────────
+class PPOVisionT5Model(nn.Module):
+    def __init__(self, img_encoder, txt_model_name="t5-small", img_emb_dim=768):
+        super().__init__()
+        self.img_encoder = img_encoder
+        # ← self.txt_model (matches PPO notebook Cell 4)
+        self.txt_model = T5ForConditionalGeneration.from_pretrained(txt_model_name)
+        # ← self.img_proj (matches PPO notebook Cell 4)
+        self.img_proj = nn.Linear(img_emb_dim, self.txt_model.config.d_model)
+    def generate_reports(self, images, max_length=128):
+        self.eval()
+        with torch.no_grad():
+            img_features = self.img_encoder(images)             # [B, feature_dim]
+            img_emb = self.img_proj(img_features).unsqueeze(1) # [B, 1, d_model]
+            batch_size = images.size(0)
+            img_attn = torch.ones(batch_size, 1, device=images.device)
+            encoder_outputs = self.txt_model.encoder(
+                inputs_embeds=img_emb,
+                attention_mask=img_attn
+            )
+            # BUG FIX 3: same repetition guards as SFT
+            generated = self.txt_model.generate(
+                encoder_outputs=encoder_outputs,
+                attention_mask=img_attn,
+                max_length=max_length,
+                num_beams=4,
+                early_stopping=True,
+                no_repeat_ngram_size=3,
+                repetition_penalty=1.3,
+            )
+        reports = tokenizer.batch_decode(generated, skip_special_tokens=True)
+        # Strip any leading "Projection: X." prefix that leaked from training data
+        cleaned = []
+        for r in reports:
+            if r.lower().startswith("projection:"):
+                # Remove the first "Projection: X." segment
+                parts = r.split(".", 1)
+                r = parts[1].strip() if len(parts) > 1 else r
+            cleaned.append(r)
+        return cleaned
+# ─────────────────────────────────────────────────────────────────────────────
+# ARCHITECTURE 4 — Reward Model
+# Matches RM/PPO notebook Cell 5 exactly.
+# ─────────────────────────────────────────────────────────────────────────────
+class RewardModel(nn.Module):
+    def __init__(self, img_encoder, txt_model_name="t5-small"):
+        super().__init__()
+        self.img_encoder = img_encoder
+        self.txt_encoder = T5ForConditionalGeneration.from_pretrained(txt_model_name).encoder
+        img_dim = img_encoder.feature_dim
+        txt_dim = self.txt_encoder.config.d_model
+        self.img_proj = nn.Linear(img_dim, 512)
+        self.txt_proj = nn.Linear(txt_dim, 512)
+        self.reward_head = nn.Sequential(
+            nn.Linear(1024, 512), nn.ReLU(), nn.Dropout(0.1),
+            nn.Linear(512, 256),  nn.ReLU(), nn.Dropout(0.1),
+            nn.Linear(256, 1)
+        )
+    def forward(self, images, input_ids, attention_mask):
+        img_features = self.img_encoder(images)
+        img_emb = self.img_proj(img_features)
+        txt_outputs = self.txt_encoder(input_ids=input_ids, attention_mask=attention_mask)
+        txt_emb = txt_outputs.last_hidden_state.mean(dim=1)
+        txt_emb = self.txt_proj(txt_emb)
+        combined = torch.cat([img_emb, txt_emb], dim=1)
+        return self.reward_head(combined).squeeze(-1)
 # ─────────────────────────────────────────────────────────────────────────────
+# MODEL LOADER — handles both .pt (state_dict) and .pkl (full model)
+# Prints a key-match diagnostic so you can see exactly what loaded.
 # ─────────────────────────────────────────────────────────────────────────────
+def remap_keys(raw_sd: dict, label: str) -> dict:
     """
+    Remap state_dict keys to match current model attribute names.
+    Known mismatches discovered from diagnostic output:
+      SFT notebook used:
+        img_encoder.encoder.*   →  we use  img_encoder.backbone.*
+        t5.*                    →  we use  t5.*  (already correct for SFTVisionT5Model)
+        proj.*                  →  we use  proj.* (already correct for SFTVisionT5Model)
+      PPO/RM notebooks used:
+        img_encoder.backbone.*  →  already correct ✅
+        txt_model.*             →  already correct ✅
+        img_proj.*              →  already correct ✅
     """
+    remapped = {}
+    changed = 0
+    for k, v in raw_sd.items():
+        new_k = k
+        # SFT encoder used self.encoder, our CoAtNetEncoder uses self.backbone
+        if "img_encoder.encoder." in new_k:
+            new_k = new_k.replace("img_encoder.encoder.", "img_encoder.backbone.")
+            changed += 1
+        remapped[new_k] = v
+    if changed:
+        print(f"   🔧 Remapped {changed} keys: img_encoder.encoder.* → img_encoder.backbone.*")
+    return remapped
+def load_model(path: str, model_obj: nn.Module, label: str) -> nn.Module:
+    print(f"\n📂 Loading {label} from: {path}")
+    if path.endswith(".pkl"):
+        with open(path, "rb") as f:
+            loaded = pickle.load(f)
+        print(f"   ✅ Loaded full pickle object: {type(loaded)}")
+        return loaded.to(device)
+    # .pt state_dict
+    raw_sd = torch.load(path, map_location=device)
+    # Print first 5 saved keys for diagnosis
+    saved_keys = list(raw_sd.keys())
+    print(f"   Saved keys (first 5): {saved_keys[:5]}")
+    model_keys = list(model_obj.state_dict().keys())
+    print(f"   Model keys (first 5): {model_keys[:5]}")
+    # Remap any mismatched key prefixes
+    raw_sd = remap_keys(raw_sd, label)
+    result = model_obj.load_state_dict(raw_sd, strict=False)
+    # Ignore known-safe missing keys:
+    #   head.fc.*            - classification head, intentionally removed (num_classes=0)
+    #   num_batches_tracked  - BatchNorm counter, not a learned weight
+    SAFE_MISSING = ("num_batches_tracked", "head.fc.")
+    missing    = [k for k in result.missing_keys    if not any(s in k for s in SAFE_MISSING)]
+    unexpected = [k for k in result.unexpected_keys if "num_batches_tracked" not in k]
+    if missing:
+        print(f"   Missing keys: {missing[:5]}{'...' if len(missing)>5 else ''}")
+        print(f"   WARNING: {len(missing)} missing keys - weights NOT loaded for those layers!")
+    if unexpected:
+        print(f"   Unexpected keys: {unexpected[:5]}{'...' if len(unexpected)>5 else ''}")
+    if not missing and not unexpected:
+        print(f"   OK: All keys matched perfectly!")
+    return model_obj.to(device)
+# ─────────────────────────────────────────────────────────────────────────────
+# LOAD ALL THREE MODELS FROM HUGGING FACE HUB
+# Models are downloaded from Shree2604/BioStack repository
+# ─────────────────────────────────────────────────────────────────────────────
+def download_model_from_hf(model_filename: str, local_path: str = "models/") -> str:
+    """Download model from Hugging Face Hub if not exists locally"""
+    os.makedirs(local_path, exist_ok=True)
+    full_path = os.path.join(local_path, model_filename)
+    if not os.path.exists(full_path):
+        print(f" Downloading {model_filename} from Hugging Face Hub...")
+        try:
+            downloaded_path = hf_hub_download(
+                repo_id="Shree2604/BioStack",
+                filename=model_filename,
+                local_dir=local_path,
+                local_dir_use_symlinks=False
             )
+            print(f" Downloaded {model_filename}")
+            return downloaded_path
+        except Exception as e:
+            print(f" Failed to download {model_filename}: {e}")
+            raise
+    else:
+        print(f" Using local {model_filename}")
+        return full_path
+print("\n" + "="*60)
+print("  LOADING MODELS FROM HUGGING FACE HUB")
+print("="*60)
+# Download models from Hugging Face
+SFT_MODEL_PATH = download_model_from_hf("best_model.pt")
+REWARD_MODEL_PATH = download_model_from_hf("reward_model.pt")
+PPO_MODEL_PATH = download_model_from_hf("rlhf_model.pt")
+# SFT
+_sft_enc  = CoAtNetEncoder(pretrained=False)
+sft_model = load_model(SFT_MODEL_PATH, SFTVisionT5Model(_sft_enc), "SFT Model")
+sft_model.eval()
+# Reward
+_rm_enc      = CoAtNetEncoder(pretrained=False)
+reward_model = load_model(REWARD_MODEL_PATH, RewardModel(_rm_enc), "Reward Model")
+reward_model.eval()
+# PPO
+_ppo_enc  = CoAtNetEncoder(pretrained=False)
+ppo_model = load_model(PPO_MODEL_PATH, PPOVisionT5Model(_ppo_enc), "PPO Model")
+ppo_model.eval()
+print("\n All models loaded and ready!\n" + "="*60 + "\n")
+# ────────────────────────────────────────────────────────────────��────────────
+# IMAGE PREPROCESSING
+# Matches BOTH notebooks: RGB, 224×224, ImageNet normalisation
+# ─────────────────────────────────────────────────────────────────────────────
+transform = transforms.Compose([
+    transforms.Resize((224, 224)),
+    transforms.ToTensor(),
+    transforms.Normalize(mean=[0.485, 0.456, 0.406],
+                         std=[0.229, 0.224, 0.225])
+])
+def preprocess(file_bytes: bytes) -> torch.Tensor:
+    img = Image.open(io.BytesIO(file_bytes)).convert("RGB")
+    return transform(img).unsqueeze(0).to(device)   # [1, 3, 224, 224]
 # ─────────────────────────────────────────────────────────────────────────────
+# REWARD FEEDBACK GENERATOR
 # ─────────────────────────────────────────────────────────────────────────────
+KEY_MEDICAL_TERMS = [
+    'lung', 'heart', 'normal', 'clear', 'opacity', 'infiltrate',
+    'cardiomegaly', 'pleural', 'pulmonary', 'chest', 'thorax',
+    'pneumonia', 'edema', 'effusion', 'consolidation'
+]
+def reward_feedback(report: str, score: float) -> str:
+    rl = report.lower()
+    present = [t for t in KEY_MEDICAL_TERMS if t in rl]
+    missing  = [t for t in KEY_MEDICAL_TERMS if t not in rl]
+    words    = len(report.split())
+    length_q = "good" if 50 <= words <= 150 else ("too short" if words < 50 else "too long")
+    # Quality factor assessments based on the score and analysis
+    terminology_score = len(present) / len(KEY_MEDICAL_TERMS)
+    completeness_score = min(1.0, words / 100.0)  # Rough estimate based on length
+    structure_score = 1.0 if 50 <= words <= 150 else 0.5  # Good structure if proper length
+    radiological_score = score  # The overall score represents alignment
+    return (
+        f"Reward Score: {score:.2f} | "
+        f"Quality Factors - "
+        f"Medical Terminology: {terminology_score:.1%} | "
+        f"Clinical Completeness: {completeness_score:.1%} | "
+        f"Report Structure: {structure_score:.1%}"
     )
 # ─────────────────────────────────────────────────────────────────────────────
 # FASTAPI APP
 # ─────────────────────────────────────────────────────────────────────────────
+app = FastAPI(title="RLHF Medical Demo")
 app.add_middleware(
     CORSMiddleware,
+    allow_origins=["*"],  # Allow all origins for Hugging Face Spaces
     allow_methods=["*"],
     allow_headers=["*"],
 )
 @app.get("/health")
 def health():
+    return {"status": "ok", "device": str(device)}
 @app.post("/sft")
 async def sft_inference(file: UploadFile = File(...)):
     try:
+        tensor = preprocess(await file.read())
+        report = sft_model.generate_reports(tensor)[0]
         print(f"[SFT] Generated: {report}")
+        return {"report": report[:81]}
     except Exception as e:
         traceback.print_exc()
+        return {"report": f"ERROR: {str(e)}"}
+@app.post("/reward")
+async def reward_inference(file: UploadFile = File(...)):
     try:
+        tensor = preprocess(await file.read())
+        # First get the SFT report to score
+        sft_report = sft_model.generate_reports(tensor)[0]
+        print(f"[REWARD] Scoring SFT report: {sft_report}")
+        if not sft_report.strip():
+            return {"score": 0.0, "feedback": "", "sft_report": ""}
+        enc = tokenizer(
+            [sft_report],
+            max_length=128,
+            padding="max_length",
+            truncation=True,
+            return_tensors="pt"
+        )
+        input_ids      = enc.input_ids.to(device)
+        attention_mask = enc.attention_mask.to(device)
         with torch.no_grad():
+            raw_score = reward_model(tensor, input_ids, attention_mask).item()
+        # Detailed debug logging
+        print(f"[REWARD] Raw neural network output: {raw_score:.6f}")
+        print(f"[REWARD] Clamping to [0,1] range: max(0.0, min(1.0, {raw_score:.6f})) = {max(0.0, min(1.0, raw_score)):.6f}")
+        # Quality assessment details
+        rl = sft_report.lower()
+        present = [t for t in KEY_MEDICAL_TERMS if t in rl]
+        missing  = [t for t in KEY_MEDICAL_TERMS if t not in rl]
+        words    = len(sft_report.split())
+        length_q = "good" if 50 <= words <= 150 else ("too short" if words < 50 else "too long")
+        print(f"[REWARD] Report analysis:")
+        print(f"         - Total words: {words} ({length_q})")
+        print(f"         - Medical terms present ({len(present)}/{len(KEY_MEDICAL_TERMS)}): {present}")
+        print(f"         - Medical terms missing: {missing}")
+        print(f"         - Key terms list: {KEY_MEDICAL_TERMS}")
+        # Reward model architecture details
+        print(f"[REWARD] Model architecture:")
+        print(f"         - CoAtNet feature dim: {reward_model.img_encoder.feature_dim}")
+        print(f"         - T5 d_model: {reward_model.txt_encoder.config.d_model}")
+        print(f"         - Combined feature dim: 1024 (512 img + 512 text)")
+        print(f"         - Reward head: 1024→512→256→1")
+        # Clamped score for display
+        score = float(max(0.0, min(1.0, raw_score)))
+        feedback = reward_feedback(sft_report, score)
+        print(f"[REWARD] Final Score={score:.3f}")
+        return {"score": score, "feedback": feedback, "sft_report": sft_report}
     except Exception as e:
         traceback.print_exc()
+        return {"score": 0.0, "feedback": f"ERROR: {str(e)}", "sft_report": ""}
+@app.post("/ppo")
+async def ppo_inference(file: UploadFile = File(...)):
     try:
+        tensor = preprocess(await file.read())
+        report = ppo_model.generate_reports(tensor)[0]
+        print(f"[PPO] Generated: {report}")
+        return {"report": report}
     except Exception as e:
         traceback.print_exc()
+        return {"report": f"ERROR: {str(e)}"}
 # ─────────────────────────────────────────────────────────────────────────────
+# DIAGNOSTIC ENDPOINT — call GET /debug_keys to verify key names in your files
+# e.g.  curl http://localhost:8000/debug_keys
+# ─────────────────────────────────────────────────────────────────────────────
+@app.get("/debug_keys")
+def debug_keys():
+    import os
+    result = {}
+    for label, path in [("SFT", SFT_MODEL_PATH), ("Reward", REWARD_MODEL_PATH), ("PPO", PPO_MODEL_PATH)]:
+        if not os.path.exists(path):
+            result[label] = f"FILE NOT FOUND: {path}"
+            continue
+        try:
+            sd = torch.load(path, map_location="cpu")
+            keys = list(sd.keys())
+            result[label] = {"first_10_keys": keys[:10], "total_keys": len(keys)}
+        except Exception as e:
+            result[label] = f"ERROR: {e}"
+    return result
+# ─────────────────────────────────────────────────────────────────────────────
+# STATIC FILE SERVING - Mount React build directory AFTER all API routes
 # ─────────────────────────────────────────────────────────────────────────────
 from fastapi.staticfiles import StaticFiles
+import os
+# Check if build directory exists, create fallback if needed
 if os.path.exists("build"):
     app.mount("/", StaticFiles(directory="build", html=True), name="static")
     print("✅ React app mounted at /")