Spaces:

VEDAGI1
/

Medica_DecisionSupportAI

Sleeping

App Files Files Community

Rajan Sharma commited on Jul 29

Commit

d877f27

verified ·

1 Parent(s): b86ef2c

Update app.py

Browse files

Files changed (1) hide show

app.py +97 -36

app.py CHANGED Viewed

@@ -7,86 +7,123 @@ from huggingface_hub.utils import RepositoryNotFoundError, HfHubHTTPError
 import time
 import requests
 from tenacity import retry, stop_after_attempt, wait_exponential
 def get_timestamp():
     """Get current UTC datetime in specified format"""
     return datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%SS')
-def format_system_info():
     """Format system information header"""
-    return (
         f"Current Date and Time (UTC - YYYY-MM-DD HH:MM:SS formatted): {get_timestamp()}\n"
         f"Current User's Login: Raj-VedAI\n"
     )
-# Add retry decorator for connection attempts
-@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
-def initialize_model():
     try:
-        # Try HUGGING_FACE_HUB_TOKEN first, fallback to HF_TOKEN
         token = os.getenv("HUGGING_FACE_HUB_TOKEN") or os.getenv("HF_TOKEN")
         if not token:
-            return False, "No token found. Please set HUGGING_FACE_HUB_TOKEN or HF_TOKEN in Space secrets.", None
-        # Force re-login to refresh connection
         login(token=token, add_to_git_credential=False)
-        # Initialize with device mapping and low memory settings
         model_id = "CohereLabs/c4ai-command-a-03-2025"
         tokenizer = AutoTokenizer.from_pretrained(
             model_id,
             token=token,
-            use_fast=True
         )
         model = AutoModelForCausalLM.from_pretrained(
             model_id,
             token=token,
             device_map="auto",
             low_cpu_mem_usage=True,
-            torch_dtype="auto"
         )
-        return True, model, tokenizer
     except Exception as e:
-        return False, f"Error during initialization: {str(e)}", None
-@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
 def chat(message, history):
-    system_info = format_system_info()
     try:
-        # Initialize model if not already done
-        success, result, tokenizer = initialize_model()
-        if not success:
-            return [{"role": "user", "content": message},
-                   {"role": "assistant", "content": f"{system_info}Error: {result}"}]
-        model = result
         if history is None:
             history = []
-        # Format messages with the chat template
         messages = [{"role": "user", "content": message}]
-        input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True)
-        # Generate response with safety settings
-        gen_tokens = model.generate(
-            input_ids,
-            max_new_tokens=100,
-            do_sample=True,
-            temperature=0.3,
-            pad_token_id=tokenizer.eos_token_id,
-            attention_mask=input_ids.new_ones(input_ids.shape)
-        )
         # Decode response
         gen_text = tokenizer.decode(gen_tokens[0], skip_special_tokens=True)
-        # Format response using new message format
         history.append({"role": "user", "content": message})
         history.append({"role": "assistant", "content": f"{system_info}{gen_text}"})
         return history
     except Exception as e:
         error_msg = f"{system_info}Error during chat: {str(e)}\nAttempting reconnection..."
         if history is None:
             history = []
         history.append({"role": "user", "content": message})
@@ -103,11 +140,12 @@ def check_connection():
 Connection Status: ✅ Connected
 Model: {model_info.modelId}
 Last Modified: {model_info.lastModified}
 """
     except Exception as e:
         return f"{format_system_info()}Connection Status: ❌ Error\nDetails: {str(e)}"
-# Create the Gradio interface with connection monitoring
 with gr.Blocks(theme=gr.themes.Default()) as demo:
     gr.Markdown(f"# Medical Decision Support AI\n{format_system_info()}")
@@ -115,6 +153,10 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
         connection_btn = gr.Button("Check Connection Status")
         connection_status = gr.Textbox(label="Connection Status", lines=6)
     chat_interface = gr.ChatInterface(
         fn=chat,
         description=f"A medical decision support system that provides healthcare-related information and guidance.\n{format_system_info()}",
@@ -123,12 +165,31 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
             "What are common drug interactions with aspirin?",
             "What are the warning signs of diabetes?",
         ],
-        type='messages'  # Using new message format
     )
     connection_btn.click(check_connection, outputs=connection_status)
-    # Check connection on startup
     connection_status.value = check_connection()
 demo.launch()

 import time
 import requests
 from tenacity import retry, stop_after_attempt, wait_exponential
+from functools import lru_cache
+import torch
+# Global variables for model caching
+global_model = None
+global_tokenizer = None
 def get_timestamp():
     """Get current UTC datetime in specified format"""
     return datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%SS')
+def format_system_info(processing_time=None):
     """Format system information header"""
+    info = (
         f"Current Date and Time (UTC - YYYY-MM-DD HH:MM:SS formatted): {get_timestamp()}\n"
         f"Current User's Login: Raj-VedAI\n"
     )
+    if processing_time is not None:
+        info += f"Processing Time: {processing_time:.2f} seconds\n"
+    return info
+@lru_cache(maxsize=1)
+def load_model():
+    """Load and cache the model"""
+    global global_model, global_tokenizer
+    if global_model is not None and global_tokenizer is not None:
+        return global_model, global_tokenizer
     try:
         token = os.getenv("HUGGING_FACE_HUB_TOKEN") or os.getenv("HF_TOKEN")
         if not token:
+            raise ValueError("No token found. Please set HUGGING_FACE_HUB_TOKEN or HF_TOKEN in Space secrets.")
         login(token=token, add_to_git_credential=False)
         model_id = "CohereLabs/c4ai-command-a-03-2025"
+        # Load tokenizer with optimizations
         tokenizer = AutoTokenizer.from_pretrained(
             model_id,
             token=token,
+            use_fast=True,
+            model_max_length=2048
         )
+        # Load model with optimizations
         model = AutoModelForCausalLM.from_pretrained(
             model_id,
             token=token,
             device_map="auto",
             low_cpu_mem_usage=True,
+            torch_dtype=torch.float16,  # Use float16 for faster inference
+            offload_folder="offload"  # Enable model offloading if needed
         )
+        global_model = model
+        global_tokenizer = tokenizer
+        return model, tokenizer
+    except Exception as e:
+        raise Exception(f"Error loading model: {str(e)}")
+def generate_with_timeout(model, input_ids, max_new_tokens=100, timeout=60):
+    """Generate response with timeout"""
+    try:
+        with torch.no_grad():
+            output = model.generate(
+                input_ids,
+                max_new_tokens=max_new_tokens,
+                do_sample=True,
+                temperature=0.3,
+                pad_token_id=model.config.eos_token_id,
+                attention_mask=input_ids.new_ones(input_ids.shape),
+                top_p=0.9,
+                repetition_penalty=1.2,
+                timeout_seconds=timeout
+            )
+        return output
     except Exception as e:
+        raise Exception(f"Generation timeout or error: {str(e)}")
+@retry(stop=stop_after_attempt(2), wait=wait_exponential(multiplier=1, min=2, max=4))
 def chat(message, history):
+    start_time = time.time()
     try:
+        # Load or get cached model
+        model, tokenizer = load_model()
         if history is None:
             history = []
+        # Format messages
         messages = [{"role": "user", "content": message}]
+        input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to(model.device)
+        # Generate response with timeout
+        gen_tokens = generate_with_timeout(model, input_ids)
         # Decode response
         gen_text = tokenizer.decode(gen_tokens[0], skip_special_tokens=True)
+        # Calculate processing time
+        processing_time = time.time() - start_time
+        system_info = format_system_info(processing_time)
+        # Format response
         history.append({"role": "user", "content": message})
         history.append({"role": "assistant", "content": f"{system_info}{gen_text}"})
         return history
     except Exception as e:
+        processing_time = time.time() - start_time
+        system_info = format_system_info(processing_time)
         error_msg = f"{system_info}Error during chat: {str(e)}\nAttempting reconnection..."
         if history is None:
             history = []
         history.append({"role": "user", "content": message})
 Connection Status: ✅ Connected
 Model: {model_info.modelId}
 Last Modified: {model_info.lastModified}
+Model Status: {'Loaded' if global_model is not None else 'Not Loaded'}
 """
     except Exception as e:
         return f"{format_system_info()}Connection Status: ❌ Error\nDetails: {str(e)}"
+# Create the Gradio interface with loading indicator
 with gr.Blocks(theme=gr.themes.Default()) as demo:
     gr.Markdown(f"# Medical Decision Support AI\n{format_system_info()}")
         connection_btn = gr.Button("Check Connection Status")
         connection_status = gr.Textbox(label="Connection Status", lines=6)
+    # Add loading configuration
+    with gr.Row():
+        gr.Markdown("⚙️ Model is loading... Please wait for first response.")
     chat_interface = gr.ChatInterface(
         fn=chat,
         description=f"A medical decision support system that provides healthcare-related information and guidance.\n{format_system_info()}",
             "What are common drug interactions with aspirin?",
             "What are the warning signs of diabetes?",
         ],
+        type='messages',
+        retry_btn="Retry ↺",
+        undo_btn="Undo ↶",
+        clear_btn="Clear 🗑️"
     )
     connection_btn.click(check_connection, outputs=connection_status)
+    # Check connection and load model on startup
     connection_status.value = check_connection()
+    # Pre-load the model
+    try:
+        load_model()
+    except Exception as e:
+        gr.Warning(f"Model pre-loading failed: {str(e)}")
+# Update requirements
+requirements = """
+gradio>=3.50.2
+transformers
+torch
+accelerate
+huggingface_hub
+requests
+tenacity
+"""
 demo.launch()