Spaces:

aneeshm44
/

Gemma3nSolution

Sleeping

App Files Files Community

aneeshm44 commited on Aug 6, 2025

Commit

089ec60

verified ·

1 Parent(s): 0dc73ab

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -66

app.py CHANGED Viewed

@@ -15,11 +15,8 @@ from PIL import Image
 import gradio as gr
 from huggingface_hub import snapshot_download
 from typing import List, Union, Dict
-# Configuration
-class CFG:
-    MAX_LENGTH = 512
-    LABEL_MASK = -100
 # Vision Model
 class TimmCNNModel(nn.Module):
@@ -44,7 +41,7 @@ class TimmCNNModel(nn.Module):
             nn.ReLU(inplace=True),
             nn.Linear(256, num_classes)
         )
     def forward_features(self, x: torch.Tensor) -> torch.Tensor:
         return self.backbone(x)
@@ -211,17 +208,15 @@ class Model(nn.Module):
             **generator_kwargs
         )
-# Global variables for models
 vlm_model = None
 tokenizer = None
 def download_and_load_models():
-    """Download models and load them into memory"""
-    global vlm_model, tokenizer
     print("Starting model download and initialization...")
-    # Set device
     if torch.cuda.is_available():
         device = torch.device("cuda:0")
         print("CUDA available - using GPU")
@@ -229,7 +224,6 @@ def download_and_load_models():
         device = torch.device("cpu")
         print("CUDA not available - using CPU")
-    # Download weights
     repo_id = "aneeshm44/regfinal"
     print(f"Downloading from repo: {repo_id}")
@@ -253,12 +247,10 @@ def download_and_load_models():
         print(f"Download failed: {e}")
         raise e
-    # Set paths
     llm_path = os.path.join(local_dir, "llmweights")
     image_weights_path = os.path.join(local_dir, "imagemodelweights", "finalcheckpoint.pth")
     projector_weights_path = os.path.join(local_dir, "projectorweights", "projector.pth")
-    # Load Language Model
     print("Loading language model...")
     try:
         language_model = AutoModelForCausalLM.from_pretrained(
@@ -276,7 +268,6 @@ def download_and_load_models():
         print(f"Language model loading failed: {e}")
         raise e
-    # Load Vision Model
     print("Loading vision model...")
     try:
         image_model = TimmCNNModel(num_classes=8)
@@ -292,7 +283,6 @@ def download_and_load_models():
         print(f"Vision model loading failed: {e}")
         raise e
-    # Load Projector
     print("Loading projector...")
     try:
         projector = Projector_4to3d(cnn_dim=1280, llm_dim=2048, num_heads=8)
@@ -308,7 +298,6 @@ def download_and_load_models():
         print(f"Projector loading failed: {e}")
         raise e
-    # Create VLM Model
     print("Creating VLM model...")
     try:
         vlm_model = Model(image_model, language_model, projector, tokenizer, prompt="Describe this image:")
@@ -318,38 +307,29 @@ def download_and_load_models():
         print(f"VLM model creation failed: {e}")
         raise e
-    print("All models loaded successfully!")
-def pil_to_tensor(image):
-    """Convert PIL image directly to tensor without normalization"""
-    # Convert PIL to numpy array
-    img_array = np.array(image)
-    # Convert to tensor and normalize to [0, 1] range
-    img_tensor = torch.from_numpy(img_array).float() / 255.0
-    # Rearrange from HWC to CHW format
-    img_tensor = img_tensor.permute(2, 0, 1)
-    # Add batch dimension
-    img_tensor = img_tensor.unsqueeze(0)
-    return img_tensor
 def tensor_to_pil_image(tensor):
-    """Convert tensor to PIL image for display"""
-    # Remove batch dimension and clamp values
     img_tensor = tensor.squeeze(0)
     img_tensor = torch.clamp(img_tensor, 0, 1)
-    # Convert to PIL
     img_array = img_tensor.permute(1, 2, 0).numpy()
     img_array = (img_array * 255).astype(np.uint8)
     return Image.fromarray(img_array)
 def describe_image(image, temperature, top_p, max_tokens, progress=gr.Progress()):
-    """Generate description for uploaded image"""
-    global vlm_model, tokenizer
     if vlm_model is None:
         return "Models not loaded yet. Please wait for initialization to complete.", None
@@ -358,7 +338,6 @@ def describe_image(image, temperature, top_p, max_tokens, progress=gr.Progress()
         return "Please upload an image.", None
     try:
-        # Progress tracking
         progress(0.1, desc="Starting image processing...")
         # Preprocess image
@@ -367,12 +346,10 @@ def describe_image(image, temperature, top_p, max_tokens, progress=gr.Progress()
         elif hasattr(image, 'convert'):
             image = image.convert('RGB')
-        progress(0.3, desc="Converting image to tensor...")
-        # Convert PIL image directly to tensor
-        image_tensor = pil_to_tensor(image)
-        # Convert tensor to PIL image for display
         processed_image = tensor_to_pil_image(image_tensor)
         progress(0.5, desc="Setting up generation parameters...")
@@ -395,7 +372,6 @@ def describe_image(image, temperature, top_p, max_tokens, progress=gr.Progress()
         progress(0.9, desc="Finalizing report...")
-        # Clean up the output (remove the prompt)
         if "Describe this image:" in text:
             description = text.split("Describe this image:")[-1].strip()
         else:
@@ -411,35 +387,32 @@ def describe_image(image, temperature, top_p, max_tokens, progress=gr.Progress()
         return f"Error processing image: {str(e)}", None
 def reset_interface():
-    """Reset the interface by clearing all outputs"""
-    return None, "Models loaded successfully! Upload an image to get started.", None
-# Initialize models when the script starts
 try:
     download_and_load_models()
-    initial_status = "Models loaded successfully! Upload an image to get started."
 except Exception as e:
     initial_status = f"Failed to load models: {str(e)}"
-# Create Gradio Interface
 def create_interface():
     with gr.Blocks(title="WSI Pathology Report using Gemma3n") as demo:
         gr.Markdown("# WSI Pathology Report using Gemma3n")
-        gr.Markdown("Upload a pathology image and get an AI-generated pathology report.")
         with gr.Row():
             with gr.Column():
-                image_input = gr.Image(type="pil", label="Upload WSI Image")
                 # Generation parameters
                 with gr.Row():
                     temperature_slider = gr.Slider(
                         minimum=0.1,
                         maximum=1.0,
-                        value=0.4,
                         step=0.1,
                         label="Temperature",
-                        info="Lower values = more focused/consistent, Higher values = more creative/varied"
                     )
                     top_p_slider = gr.Slider(
@@ -448,7 +421,7 @@ def create_interface():
                         value=0.9,
                         step=0.1,
                         label="Top-p",
-                        info="Lower values = more focused vocabulary, Higher values = more diverse vocabulary"
                     )
                     max_tokens_slider = gr.Slider(
@@ -456,7 +429,7 @@ def create_interface():
                         maximum=200,
                         value=100,
                         step=10,
-                        label="Max Tokens"
                     )
                 with gr.Row():
@@ -472,27 +445,23 @@ def create_interface():
                 )
                 processed_image = gr.Image(
-                    label="Processed Image Tensor",
                     show_download_button=True
                 )
-        # Event handlers
-        submit_btn.click(
-            fn=describe_image,
-            inputs=[image_input, temperature_slider, top_p_slider, max_tokens_slider],
-            outputs=[output_text, processed_image],
-            show_progress=True
         )
-        # Auto-generate on image upload
-        image_input.change(
             fn=describe_image,
             inputs=[image_input, temperature_slider, top_p_slider, max_tokens_slider],
             outputs=[output_text, processed_image],
             show_progress=True
         )
-        # Reset functionality
         reset_btn.click(
             fn=reset_interface,
             inputs=[],
@@ -501,7 +470,6 @@ def create_interface():
     return demo
-# Launch the interface
 if __name__ == "__main__":
     demo = create_interface()
     demo.launch(
@@ -509,5 +477,4 @@ if __name__ == "__main__":
         server_port=7860,
         share=False,
         show_error=True
-    )

 import gradio as gr
 from huggingface_hub import snapshot_download
 from typing import List, Union, Dict
+import torchvision.transforms as transforms
 # Vision Model
 class TimmCNNModel(nn.Module):
             nn.ReLU(inplace=True),
             nn.Linear(256, num_classes)
         )
     def forward_features(self, x: torch.Tensor) -> torch.Tensor:
         return self.backbone(x)
             **generator_kwargs
         )
 vlm_model = None
 tokenizer = None
+transform = None
 def download_and_load_models():
+    global vlm_model, tokenizer, transform
     print("Starting model download and initialization...")
     if torch.cuda.is_available():
         device = torch.device("cuda:0")
         print("CUDA available - using GPU")
         device = torch.device("cpu")
         print("CUDA not available - using CPU")
     repo_id = "aneeshm44/regfinal"
     print(f"Downloading from repo: {repo_id}")
         print(f"Download failed: {e}")
         raise e
     llm_path = os.path.join(local_dir, "llmweights")
     image_weights_path = os.path.join(local_dir, "imagemodelweights", "finalcheckpoint.pth")
     projector_weights_path = os.path.join(local_dir, "projectorweights", "projector.pth")
     print("Loading language model...")
     try:
         language_model = AutoModelForCausalLM.from_pretrained(
         print(f"Language model loading failed: {e}")
         raise e
     print("Loading vision model...")
     try:
         image_model = TimmCNNModel(num_classes=8)
         print(f"Vision model loading failed: {e}")
         raise e
     print("Loading projector...")
     try:
         projector = Projector_4to3d(cnn_dim=1280, llm_dim=2048, num_heads=8)
         print(f"Projector loading failed: {e}")
         raise e
     print("Creating VLM model...")
     try:
         vlm_model = Model(image_model, language_model, projector, tokenizer, prompt="Describe this image:")
         print(f"VLM model creation failed: {e}")
         raise e
+    transform = transforms.Compose([
+        transforms.Resize((256, 256)),
+        transforms.ToTensor(),
+    ])
+    print("All models loaded successfully!")
 def tensor_to_pil_image(tensor):
     img_tensor = tensor.squeeze(0)
     img_tensor = torch.clamp(img_tensor, 0, 1)
     img_array = img_tensor.permute(1, 2, 0).numpy()
     img_array = (img_array * 255).astype(np.uint8)
     return Image.fromarray(img_array)
+def on_image_upload(image):
+    if image is not None:
+        return "Image processed, click 'Generate Report' to produce report."
+    else:
+        return "Models are loaded, upload the Image to get started."
 def describe_image(image, temperature, top_p, max_tokens, progress=gr.Progress()):
+    global vlm_model, tokenizer, transform
     if vlm_model is None:
         return "Models not loaded yet. Please wait for initialization to complete.", None
         return "Please upload an image.", None
     try:
         progress(0.1, desc="Starting image processing...")
         # Preprocess image
         elif hasattr(image, 'convert'):
             image = image.convert('RGB')
+        progress(0.3, desc="Applying image transformations...")
+        image_tensor = transform(image).unsqueeze(0)  # Add batch dimension
         processed_image = tensor_to_pil_image(image_tensor)
         progress(0.5, desc="Setting up generation parameters...")
         progress(0.9, desc="Finalizing report...")
         if "Describe this image:" in text:
             description = text.split("Describe this image:")[-1].strip()
         else:
         return f"Error processing image: {str(e)}", None
 def reset_interface():
+    return None, "Models are loaded, upload the WSI file to get started.", None
 try:
     download_and_load_models()
+    initial_status = "Models are loaded, upload the WSI file to get started."
 except Exception as e:
     initial_status = f"Failed to load models: {str(e)}"
 def create_interface():
     with gr.Blocks(title="WSI Pathology Report using Gemma3n") as demo:
         gr.Markdown("# WSI Pathology Report using Gemma3n")
+        gr.Markdown("Upload a pathology WSI to get concise a report")
         with gr.Row():
             with gr.Column():
+                image_input = gr.Image(type="pil", label="Upload WSI file")
                 # Generation parameters
                 with gr.Row():
                     temperature_slider = gr.Slider(
                         minimum=0.1,
                         maximum=1.0,
+                        value=0.6,
                         step=0.1,
                         label="Temperature",
+                        info="Lower values give consistent results and Higher values produce creative results"
                     )
                     top_p_slider = gr.Slider(
                         value=0.9,
                         step=0.1,
                         label="Top-p",
+                        info="Lower values use a more focused vocabulary for sampling compared to a more diverse vocabulary in Higher values"
                     )
                     max_tokens_slider = gr.Slider(
                         maximum=200,
                         value=100,
                         step=10,
+                        label="Max Tokens for generation"
                     )
                 with gr.Row():
                 )
                 processed_image = gr.Image(
+                    label="Processed WSI",
                     show_download_button=True
                 )
+        image_input.change(
+            fn=on_image_upload,
+            inputs=[image_input],
+            outputs=[output_text]
         )
+        submit_btn.click(
             fn=describe_image,
             inputs=[image_input, temperature_slider, top_p_slider, max_tokens_slider],
             outputs=[output_text, processed_image],
             show_progress=True
         )
         reset_btn.click(
             fn=reset_interface,
             inputs=[],
     return demo
 if __name__ == "__main__":
     demo = create_interface()
     demo.launch(
         server_port=7860,
         share=False,
         show_error=True
+    )