Spaces:

Muhammadidrees
/

JAYConverstionalAI

Paused

App Files Files Community

Muhammadidrees commited on Oct 14, 2025

Commit

429fe72

verified ·

1 Parent(s): 87b06f0

Update app.py

Browse files

Files changed (1) hide show

app.py +120 -100

app.py CHANGED Viewed

@@ -1,100 +1,120 @@
-# chat.py
-import os
-import gc
-import torch
-from transformers import LlamaTokenizer, LlamaForCausalLM
-# =============================
-# Configuration
-# =============================
-MODEL_PATH = r"C:\Users\JAY\Downloads\Chatdoc\ChatDoctor\pretrained"
-MAX_NEW_TOKENS = 200
-TEMPERATURE = 0.5
-TOP_K = 50
-REPETITION_PENALTY = 1.1
-# Detect device
-device = "cuda" if torch.cuda.is_available() else "cpu"
-print(f"Loading model from {MODEL_PATH} on {device}...")
-# =============================
-# Load Tokenizer and Model
-# =============================
-tokenizer = LlamaTokenizer.from_pretrained(MODEL_PATH)
-model = LlamaForCausalLM.from_pretrained(
-    MODEL_PATH,
-    device_map="auto",          # automatically dispatch weights to GPU
-    torch_dtype=torch.float16,  # half precision for faster inference
-    low_cpu_mem_usage=True      # optimize CPU memory
-)
-# DO NOT call model.to(device) when using device_map="auto"
-generator = model.generate
-print("✅ Model loaded successfully!\n")
-# =============================
-# Chat History
-# =============================
-history = ["ChatDoctor: I am ChatDoctor, what medical questions do you have?"]
-# =============================
-# Response Function
-# =============================
-def get_response(user_input):
-    global history
-    human_invitation = "Patient: "
-    doctor_invitation = "ChatDoctor: "
-    # Append user input
-    history.append(human_invitation + user_input)
-    # Build prompt
-    prompt = "\n".join(history) + "\n" + doctor_invitation
-    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
-    # Generate response
-    with torch.no_grad():
-        output_ids = generator(
-            input_ids,
-            max_new_tokens=MAX_NEW_TOKENS,
-            do_sample=True,
-            temperature=TEMPERATURE,
-            top_k=TOP_K,
-            repetition_penalty=REPETITION_PENALTY
-        )
-    # Decode response
-    full_output = tokenizer.decode(output_ids[0], skip_special_tokens=True)
-    response = full_output[len(prompt):].strip()
-    # Clean if the model repeats the patient prompt
-    if response.startswith("Patient:"):
-        response = response[len("Patient:"):].strip()
-    # Append model response to history
-    history.append(doctor_invitation + response)
-    # Free memory
-    del input_ids, output_ids
-    gc.collect()
-    torch.cuda.empty_cache()
-    return response
-# =============================
-# CLI Chat
-# =============================
-if __name__ == "__main__":
-    print("\n=== ChatDoctor is ready! Type your questions. ===\n")
-    while True:
-        try:
-            user_input = input("Patient: ").strip()
-            if user_input.lower() in ["exit", "quit"]:
-                print("Exiting ChatDoctor. Goodbye!")
-                break
-            response = get_response(user_input)
-            print("ChatDoctor: " + response + "\n")
-        except KeyboardInterrupt:
-            print("\nExiting ChatDoctor. Goodbye!")
-            break

+# chat.py
+import os
+import gc
+import torch
+from transformers import LlamaTokenizer, LlamaForCausalLM
+# =============================
+# Configuration
+# =============================
+MODEL_PATH = r"C:\Users\JAY\Downloads\Chatdoc\ChatDoctor\pretrained"
+MAX_NEW_TOKENS = 200
+TEMPERATURE = 0.5
+TOP_K = 50
+REPETITION_PENALTY = 1.1
+# Detect device
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"Loading model from {MODEL_PATH} on {device}...")
+# =============================
+# Load Tokenizer and Model
+# =============================
+tokenizer = LlamaTokenizer.from_pretrained(MODEL_PATH)
+model = LlamaForCausalLM.from_pretrained(
+    MODEL_PATH,
+    device_map="auto",          # automatically dispatch weights to GPU
+    torch_dtype=torch.float16,  # half precision for faster inference
+    low_cpu_mem_usage=True      # optimize CPU memory
+)
+# DO NOT call model.to(device) when using device_map="auto"
+generator = model.generate
+print("✅ Model loaded successfully!\n")
+# =============================
+# Chat History
+# =============================
+systemprompt = ("""You are ChatDoctor — an intelligent, empathetic medical AI assistant.
+Your role is to carefully gather medical information, reason clinically,
+and provide safe, evidence-based guidance.
+Follow these instructions strictly:
+1. When a patient describes their illness, DO NOT diagnose immediately.
+2. Ask relevant, targeted questions to collect all necessary details
+   such as symptoms, duration, severity, lifestyle habits, medical history,
+   medications, and any recent tests or changes.
+3. Once you have enough information for a preliminary diagnosis, clearly
+   explain your reasoning and possible causes in simple medical language.
+4. Then, provide a clear and structured response that includes:
+   - **Diagnosis:** probable or confirmed condition(s)
+   - **Dietary Advice:** foods to include and avoid
+   - **Lifestyle Advice:** exercise, sleep, stress, and other habits
+5. Be concise, empathetic, and professional at all times.
+6. Never switch roles or generate “Patient:” responses. Always remain as ChatDoctor.
+7. If symptoms suggest a serious or emergency condition, advise the patient
+   to seek immediate medical attention.""")
+history = [systemprompt, "ChatDoctor: I am ChatDoctor, what medical questions do you have?"]
+# =============================
+# Response Function
+# =============================
+def get_response(user_input):
+    global history
+    human_invitation = "Patient: "
+    doctor_invitation = "ChatDoctor: "
+    # Append user input
+    history.append(human_invitation + user_input)
+    # Build prompt
+    prompt = "\n".join(history) + "\n" + doctor_invitation
+    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
+    # Generate response
+    with torch.no_grad():
+        output_ids = generator(
+            input_ids,
+            max_new_tokens=MAX_NEW_TOKENS,
+            do_sample=True,
+            temperature=TEMPERATURE,
+            top_k=TOP_K,
+            repetition_penalty=REPETITION_PENALTY
+        )
+    # Decode response
+    full_output = tokenizer.decode(output_ids[0], skip_special_tokens=True)
+    response = full_output[len(prompt):].strip()
+    # Clean if the model repeats the patient prompt
+    if response.startswith("Patient:"):
+        response = response[len("Patient:"):].strip()
+    # Append model response to history
+    history.append(doctor_invitation + response)
+    # Free memory
+    del input_ids, output_ids
+    gc.collect()
+    torch.cuda.empty_cache()
+    return response
+# =============================
+# CLI Chat
+# =============================
+if __name__ == "__main__":
+    print("\n=== ChatDoctor is ready! Type your questions. ===\n")
+    while True:
+        try:
+            user_input = input("Patient: ").strip()
+            if user_input.lower() in ["exit", "quit"]:
+                print("Exiting ChatDoctor. Goodbye!")
+                break
+            response = get_response(user_input)
+            print("ChatDoctor: " + response + "\n")
+        except KeyboardInterrupt:
+            print("\nExiting ChatDoctor. Goodbye!")
+            break