Spaces:

VEDAGI1
/

Medica_DecisionSupportAI

Sleeping

App Files Files Community

Rajan Sharma commited on Aug 9

Commit

1c47f55

verified ·

1 Parent(s): bcc9046

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -39

app.py CHANGED Viewed

@@ -9,79 +9,78 @@ import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM
 from huggingface_hub import login, HfApi
-MODEL_ID = os.getenv("MODEL_ID", "CohereLabs/c4ai-command-a-03-2025")  # change if needed
 HF_TOKEN = (
-    os.getenv("HUGGINGFACE_HUB_TOKEN")  # <-- correct canonical name
     or os.getenv("HF_TOKEN")
 )
-def get_timestamp():
     return datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
-def format_system_info(processing_time=None):
-    info = (
-        f"Current Date and Time (UTC - YYYY-MM-DD HH:MM:SS formatted): {get_timestamp()}\n"
         f"Current User's Login: Raj-VedAI\n"
     )
     if processing_time is not None:
-        info += f"Processing Time: {processing_time:.2f} seconds\n"
-    return info
-def _pick_dtype_and_map():
     if torch.cuda.is_available():
         return torch.float16, "auto"
     if torch.backends.mps.is_available():
-        # Apple Silicon (MPS) prefers float16/bfloat16 depending on model; float16 is usually OK.
         return torch.float16, {"": "mps"}
-    return torch.float32, "cpu"  # CPU-safe
 @lru_cache(maxsize=1)
 def load_model():
     if HF_TOKEN:
-        # In Spaces this isn’t strictly necessary if the secret is set, but it doesn’t hurt.
         login(token=HF_TOKEN, add_to_git_credential=False)
-    dtype, device_map = _pick_dtype_and_map()
-    tok = AutoTokenizer.from_pretrained(
         MODEL_ID,
         token=HF_TOKEN,
         use_fast=True,
         model_max_length=4096,
-        padding_side="left",  # safer for some chat templates
     )
-    mdl = AutoModelForCausalLM.from_pretrained(
         MODEL_ID,
         token=HF_TOKEN,
         device_map=device_map,
         low_cpu_mem_usage=True,
         torch_dtype=dtype,
     )
-    # Fallback for models without an EOS defined
-    if mdl.config.eos_token_id is None and tok.eos_token_id is not None:
-        mdl.config.eos_token_id = tok.eos_token_id
-    return mdl, tok
 def build_inputs(tokenizer, message, history):
-    # Convert Gradio’s (message, history) into a chat template
     msgs = []
-    # Optionally carry past turns if your model supports it
-    for u, a in history or []:
         msgs.append({"role": "user", "content": u})
         msgs.append({"role": "assistant", "content": a})
     msgs.append({"role": "user", "content": message})
-    inputs = tokenizer.apply_chat_template(
         msgs,
         tokenize=True,
         add_generation_prompt=True,
         return_tensors="pt",
     )
-    return inputs
-def generate_reply(model, tokenizer, input_ids, max_new_tokens=256):
     input_ids = input_ids.to(model.device)
     with torch.no_grad():
         out = model.generate(
@@ -90,50 +89,49 @@ def generate_reply(model, tokenizer, input_ids, max_new_tokens=256):
             do_sample=True,
             temperature=0.3,
             top_p=0.9,
-            repetition_penalty=1.2,
             pad_token_id=tokenizer.eos_token_id,
             eos_token_id=tokenizer.eos_token_id,
         )
-    # Slice off the prompt so we only return new tokens
     gen_only = out[0, input_ids.shape[-1]:]
     text = tokenizer.decode(gen_only, skip_special_tokens=True)
     return text.strip()
 def chat_fn(message, history):
-    start = time.time()
     try:
         model, tokenizer = load_model()
         inputs = build_inputs(tokenizer, message, history)
-        reply = generate_reply(model, tokenizer, inputs, max_new_tokens=300)
-        # Optional: prepend system info once per turn
-        reply = f"{format_system_info(time.time() - start)}{reply}"
-        return reply
     except Exception as e:
-        return f"{format_system_info(time.time() - start)}Error during chat: {e}"
 def check_connection():
     try:
         api = HfApi(token=HF_TOKEN)
         mi = api.model_info(MODEL_ID)
         return (
-            f"{format_system_info()}"
             f"Connection Status: ✅ Connected\n"
             f"Model: {mi.modelId}\n"
             f"Last Modified: {mi.lastModified}\n"
         )
     except Exception as e:
-        return f"{format_system_info()}Connection Status: ❌ Error\nDetails: {e}"
 with gr.Blocks(theme=gr.themes.Default()) as demo:
-    gr.Markdown(f"# Medical Decision Support AI\n{format_system_info()}")
     with gr.Row():
         btn = gr.Button("Check Connection Status")
         status = gr.Textbox(label="Connection Status", lines=6, value="Click to check…")
-    gr.Markdown("⚙️ Model is loading on first request. Please wait for the first answer.")
     chat = gr.ChatInterface(
         fn=chat_fn,
-        type="messages",  # use the modern message format
         description="A medical decision support system that provides healthcare-related information and guidance.",
         examples=[
             "What are the symptoms of hypertension?",
@@ -147,3 +145,4 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
 if __name__ == "__main__":
     demo.launch()

 from transformers import AutoTokenizer, AutoModelForCausalLM
 from huggingface_hub import login, HfApi
+# ---- Config ----
+MODEL_ID = os.getenv("MODEL_ID", "CohereLabs/c4ai-command-r7b-12-2024")
 HF_TOKEN = (
+    os.getenv("HUGGINGFACE_HUB_TOKEN")  # canonical name in HF Spaces
     or os.getenv("HF_TOKEN")
 )
+def utc_now() -> str:
     return datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
+def header(processing_time=None) -> str:
+    s = (
+        f"Current Date and Time (UTC - YYYY-MM-DD HH:MM:SS formatted): {utc_now()}\n"
         f"Current User's Login: Raj-VedAI\n"
     )
     if processing_time is not None:
+        s += f"Processing Time: {processing_time:.2f} seconds\n"
+    return s
+def pick_dtype_and_map():
     if torch.cuda.is_available():
         return torch.float16, "auto"
     if torch.backends.mps.is_available():
         return torch.float16, {"": "mps"}
+    return torch.float32, "cpu"
 @lru_cache(maxsize=1)
 def load_model():
+    # Login (optional for public models; safe if token is unset)
     if HF_TOKEN:
         login(token=HF_TOKEN, add_to_git_credential=False)
+    dtype, device_map = pick_dtype_and_map()
+    tokenizer = AutoTokenizer.from_pretrained(
         MODEL_ID,
         token=HF_TOKEN,
         use_fast=True,
         model_max_length=4096,
+        padding_side="left",
+        trust_remote_code=True,   # <- allow custom model code
     )
+    model = AutoModelForCausalLM.from_pretrained(
         MODEL_ID,
         token=HF_TOKEN,
         device_map=device_map,
         low_cpu_mem_usage=True,
         torch_dtype=dtype,
+        trust_remote_code=True,   # <- allow custom model code
     )
+    # Ensure EOS configured
+    if model.config.eos_token_id is None and tokenizer.eos_token_id is not None:
+        model.config.eos_token_id = tokenizer.eos_token_id
+    return model, tokenizer
 def build_inputs(tokenizer, message, history):
     msgs = []
+    for u, a in (history or []):
         msgs.append({"role": "user", "content": u})
         msgs.append({"role": "assistant", "content": a})
     msgs.append({"role": "user", "content": message})
+    return tokenizer.apply_chat_template(
         msgs,
         tokenize=True,
         add_generation_prompt=True,
         return_tensors="pt",
     )
+def generate_reply(model, tokenizer, input_ids, max_new_tokens=300):
     input_ids = input_ids.to(model.device)
     with torch.no_grad():
         out = model.generate(
             do_sample=True,
             temperature=0.3,
             top_p=0.9,
+            repetition_penalty=1.15,
             pad_token_id=tokenizer.eos_token_id,
             eos_token_id=tokenizer.eos_token_id,
         )
     gen_only = out[0, input_ids.shape[-1]:]
     text = tokenizer.decode(gen_only, skip_special_tokens=True)
     return text.strip()
 def chat_fn(message, history):
+    t0 = time.time()
     try:
         model, tokenizer = load_model()
         inputs = build_inputs(tokenizer, message, history)
+        reply = generate_reply(model, tokenizer, inputs, max_new_tokens=350)
+        return f"{header(time.time() - t0)}{reply}"
     except Exception as e:
+        return f"{header(time.time() - t0)}Error during chat: {e}"
 def check_connection():
     try:
         api = HfApi(token=HF_TOKEN)
         mi = api.model_info(MODEL_ID)
         return (
+            f"{header()}"
             f"Connection Status: ✅ Connected\n"
             f"Model: {mi.modelId}\n"
             f"Last Modified: {mi.lastModified}\n"
         )
     except Exception as e:
+        return f"{header()}Connection Status: ❌ Error\nDetails: {e}"
 with gr.Blocks(theme=gr.themes.Default()) as demo:
+    gr.Markdown(f"# Medical Decision Support AI\n{header()}")
     with gr.Row():
         btn = gr.Button("Check Connection Status")
         status = gr.Textbox(label="Connection Status", lines=6, value="Click to check…")
+    gr.Markdown("⚙️ First response may take a moment while the model warms up.")
     chat = gr.ChatInterface(
         fn=chat_fn,
+        type="messages",
         description="A medical decision support system that provides healthcare-related information and guidance.",
         examples=[
             "What are the symptoms of hypertension?",
 if __name__ == "__main__":
     demo.launch()