Spaces:

Prashant26am
/

llava-chat

Running

App Files Files Community

Prashant26am commited on May 24, 2025

Commit

1ea681e

1 Parent(s): 33fd881

Move app.py to root directory for Hugging Face Space deployment

Browse files

Files changed (3) hide show

README.md +1 -1
app.py +51 -61
requirements.txt +2 -2

README.md CHANGED Viewed

@@ -5,7 +5,7 @@ colorFrom: blue
 colorTo: indigo
 sdk: gradio
 sdk_version: 4.19.2
-app_file: llava-chat/app.py
 pinned: false
 license: mit
 ---

 colorTo: indigo
 sdk: gradio
 sdk_version: 4.19.2
+app_file: app.py
 pinned: false
 license: mit
 ---

app.py CHANGED Viewed

@@ -81,25 +81,51 @@ def initialize_model():
             del model
             torch.cuda.empty_cache()
-        # Initialize new model
         model = LLaVA(
             vision_model_path="openai/clip-vit-base-patch32",
             language_model_path="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
-            device="cpu",
-            projection_hidden_dim=2048
         )
         # Configure model for inference
         if hasattr(model, 'language_model'):
             model.language_model.config.use_cache = False
             model.language_model.eval()
         model_status.update({
             "initialized": True,
             "device": str(model.device),
-            "error": None
         })
-        logger.info(f"Model successfully initialized on {model.device}")
         return True
     except Exception as e:
@@ -167,16 +193,25 @@ def process_image(
         # Clear memory
         torch.cuda.empty_cache()
-        # Process image
         with torch.inference_mode():
             try:
                 logger.info("Generating response...")
                 response = model.generate_from_image(
                     image_path=temp_path,
                     prompt=prompt,
                     max_new_tokens=max_new_tokens,
                     temperature=temperature,
-                    top_p=top_p
                 )
                 if not response:
@@ -217,25 +252,8 @@ def process_image(
         except Exception as e:
             logger.warning(f"Failed to clear CUDA cache: {str(e)}")
-def get_status_text() -> str:
-    """Get a formatted status text for display."""
-    try:
-        status = {
-            "Model Initialized": "Yes" if model is not None else "No",
-            "Device": str(model.device) if model is not None else "None",
-            "Last Error": model_status.get("last_error", "None"),
-            "Memory Usage": {
-                "CUDA Available": "Yes" if torch.cuda.is_available() else "No",
-                "Memory Allocated": f"{torch.cuda.memory_allocated() / 1024**2:.2f} MB" if torch.cuda.is_available() else "N/A",
-                "Memory Reserved": f"{torch.cuda.memory_reserved() / 1024**2:.2f} MB" if torch.cuda.is_available() else "N/A"
-            }
-        }
-        return "\n".join(f"{k}: {v}" for k, v in status.items())
-    except Exception as e:
-        return f"Error getting status: {str(e)}"
 def create_interface():
-    """Create the Gradio interface with proper error handling."""
     try:
         with gr.Blocks(title="LLaVA Chat", theme=gr.themes.Soft()) as demo:
             gr.Markdown("""
@@ -252,19 +270,13 @@ def create_interface():
             """)
             with gr.Row():
-                with gr.Column(scale=1):
-                    # Input components with explicit types and validation
-                    image_input = gr.Image(
-                        type="pil",
-                        label="Upload Image",
-                        image_mode="RGB",
-                        format="PNG"
-                    )
                     prompt_input = gr.Textbox(
                         label="Ask about the image",
                         placeholder="What can you see in this image?",
-                        lines=3,
-                        max_lines=5
                     )
                     with gr.Accordion("Advanced Settings", open=False):
@@ -291,31 +303,17 @@ def create_interface():
                         )
                     submit_btn = gr.Button("Generate Response", variant="primary")
-                    status_btn = gr.Button("Check Status", variant="secondary")
-                with gr.Column(scale=1):
                     output = gr.Textbox(
                         label="Model Response",
                         lines=10,
                         show_copy_button=True
                     )
-                    status_output = gr.Textbox(
-                        label="System Status",
-                        lines=5,
-                        show_copy_button=True
-                    )
-            # Set up event handlers with proper error handling
-            def safe_process_image(*args):
-                try:
-                    return process_image(*args)
-                except Exception as e:
-                    logger.error(f"Interface error: {str(e)}")
-                    logger.error(traceback.format_exc())
-                    return f"Error: {str(e)}"
             submit_btn.click(
-                fn=safe_process_image,
                 inputs=[
                     image_input,
                     prompt_input,
@@ -323,15 +321,7 @@ def create_interface():
                     temperature,
                     top_p
                 ],
-                outputs=output,
-                api_name="process_image"
-            )
-            status_btn.click(
-                fn=get_status_text,
-                inputs=[],
-                outputs=status_output,
-                api_name="check_status"
             )
         logger.info("Successfully created Gradio interface")

             del model
             torch.cuda.empty_cache()
+        # Set device
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        logger.info(f"Using device: {device}")
+        # Initialize new model with Hugging Face specific parameters
         model = LLaVA(
             vision_model_path="openai/clip-vit-base-patch32",
             language_model_path="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+            device=device,
+            projection_hidden_dim=2048,
+            torch_dtype=torch.float16 if device == "cuda" else torch.float32,
+            low_cpu_mem_usage=True,
+            use_safetensors=True,
+            load_in_8bit=True if device == "cuda" else False,
+            trust_remote_code=True
         )
         # Configure model for inference
         if hasattr(model, 'language_model'):
             model.language_model.config.use_cache = False
             model.language_model.eval()
+            # Set generation config
+            if hasattr(model.language_model, 'generation_config'):
+                model.language_model.generation_config.do_sample = True
+                model.language_model.generation_config.max_new_tokens = 256
+                model.language_model.generation_config.temperature = 0.7
+                model.language_model.generation_config.top_p = 0.9
+                model.language_model.generation_config.pad_token_id = model.language_model.config.eos_token_id
+        # Move model to device
+        model = model.to(device)
         model_status.update({
             "initialized": True,
             "device": str(model.device),
+            "error": None,
+            "model_info": {
+                "vision_model": "openai/clip-vit-base-patch32",
+                "language_model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+                "dtype": str(model.dtype),
+                "device": str(model.device)
+            }
         })
+        logger.info(f"Model successfully initialized on {model.device} with dtype {model.dtype}")
         return True
     except Exception as e:
         # Clear memory
         torch.cuda.empty_cache()
+        # Process image with Hugging Face specific settings
         with torch.inference_mode():
             try:
                 logger.info("Generating response...")
+                # Update generation config if available
+                if hasattr(model, 'language_model') and hasattr(model.language_model, 'generation_config'):
+                    model.language_model.generation_config.max_new_tokens = max_new_tokens
+                    model.language_model.generation_config.temperature = temperature
+                    model.language_model.generation_config.top_p = top_p
                 response = model.generate_from_image(
                     image_path=temp_path,
                     prompt=prompt,
                     max_new_tokens=max_new_tokens,
                     temperature=temperature,
+                    top_p=top_p,
+                    do_sample=True,
+                    num_beams=1,
+                    pad_token_id=model.language_model.config.eos_token_id if hasattr(model, 'language_model') else None
                 )
                 if not response:
         except Exception as e:
             logger.warning(f"Failed to clear CUDA cache: {str(e)}")
 def create_interface():
+    """Create a simplified Gradio interface."""
     try:
         with gr.Blocks(title="LLaVA Chat", theme=gr.themes.Soft()) as demo:
             gr.Markdown("""
             """)
             with gr.Row():
+                with gr.Column():
+                    # Input components
+                    image_input = gr.Image(type="pil", label="Upload Image")
                     prompt_input = gr.Textbox(
                         label="Ask about the image",
                         placeholder="What can you see in this image?",
+                        lines=3
                     )
                     with gr.Accordion("Advanced Settings", open=False):
                         )
                     submit_btn = gr.Button("Generate Response", variant="primary")
+                with gr.Column():
                     output = gr.Textbox(
                         label="Model Response",
                         lines=10,
                         show_copy_button=True
                     )
+            # Set up event handler
             submit_btn.click(
+                fn=process_image,
                 inputs=[
                     image_input,
                     prompt_input,
                     temperature,
                     top_p
                 ],
+                outputs=output
             )
         logger.info("Successfully created Gradio interface")

requirements.txt CHANGED Viewed

@@ -1,7 +1,7 @@
 transformers>=4.36.0
 torch>=2.1.0
 pillow>=10.0.0
-gradio==4.19.2
 fastapi>=0.100.0
 uvicorn>=0.23.0
 accelerate>=0.25.0
@@ -24,4 +24,4 @@ bcrypt>=4.0.1
 aiofiles>=23.2.0
 httpx>=0.26.0
 # Memory optimization
-optimum>=1.16.0

 transformers>=4.36.0
 torch>=2.1.0
 pillow>=10.0.0
+gradio>=4.0.0
 fastapi>=0.100.0
 uvicorn>=0.23.0
 accelerate>=0.25.0
 aiofiles>=23.2.0
 httpx>=0.26.0
 # Memory optimization
+optimum>=1.16.0