import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig import torch import time # ======================================================= # Load Model # ======================================================= model_name = "augtoma/qCammel-13" print("Loading tokenizer and model...") tokenizer = AutoTokenizer.from_pretrained(model_name) if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token model = AutoModelForCausalLM.from_pretrained( model_name, device_map="auto", torch_dtype=torch.float16, trust_remote_code=True, low_cpu_mem_usage=True ) model.eval() print("Model loaded successfully!") print(f"Device map: {model.hf_device_map}") print(f"Model device: {next(model.parameters()).device}") # ======================================================= # Generate Doctor Response - Interactive Medical Consultation # ======================================================= def generate_doctor_response(history): user_message = history[-1]["content"] if not user_message.strip(): history.append({"role": "assistant", "content": "How can I help you today?"}) yield history return # Build conversation context from history conversation_context = "" if len(history) > 1: # Include previous exchanges for context for msg in history[:-1]: if msg["role"] == "user": conversation_context += f"PATIENT: {msg['content']}\n" else: conversation_context += f"DOCTOR: {msg['content']}\n" # Medical conversation prompt - like real doctor-patient interaction prompt = f"""You are an experienced medical doctor conducting a patient consultation. Have a natural, interactive conversation where you: - Ask relevant follow-up questions to understand symptoms better - Gather medical history (medications, lifestyle, family history) - Provide medical assessment and recommendations - Suggest medications with dosages when appropriate - Give diet and lifestyle advice - Explain what tests or next steps are needed Respond naturally as a caring doctor would. Keep responses concise (2-4 sentences). Ask ONE specific follow-up question when you need more information. Previous conversation: {conversation_context} PATIENT: {user_message} DOCTOR:""" # Tokenize input inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048).to(model.device) gen_config = GenerationConfig( temperature=0.75, top_p=0.92, top_k=45, do_sample=True, max_new_tokens=250, pad_token_id=tokenizer.pad_token_id, eos_token_id=tokenizer.eos_token_id, repetition_penalty=1.2, no_repeat_ngram_size=3 ) input_len = inputs["input_ids"].shape[1] with torch.no_grad(): output_ids = model.generate(**inputs, generation_config=gen_config) generated_ids = output_ids[0][input_len:] response = tokenizer.decode(generated_ids, skip_special_tokens=True).strip() # Clean response response = clean_doctor_response(response) # Stream response token by token history.append({"role": "assistant", "content": ""}) for i in range(0, len(response), 4): chunk = response[:i + 4] history[-1]["content"] = chunk + "▌" yield history.copy() time.sleep(0.012) history[-1]["content"] = response yield history def clean_doctor_response(response: str) -> str: """Clean the doctor's response to be natural and conversational.""" # Remove role labels if present prefixes_to_remove = ["doctor:", "assistant:", "response:", "patient:"] response_lower = response.lower() for prefix in prefixes_to_remove: if response_lower.startswith(prefix): response = response[len(prefix):].strip() break # Stop at repetitive patterns or gibberish stop_phrases = ["accordingly", "respectively", "speaking correctly", "faithfully yours"] for phrase in stop_phrases: if phrase in response.lower(): # Find first occurrence and cut there idx = response.lower().find(phrase) response = response[:idx].strip() break # Limit to reasonable number of sentences (4-6 max) sentences = [s.strip() + '.' for s in response.split('.') if s.strip()] if len(sentences) > 6: response = ' '.join(sentences[:6]) else: response = ' '.join(sentences) # Remove incomplete sentences at the end if response and response[-1] not in '.!?': last_period = response.rfind('.') if last_period > 0: response = response[:last_period + 1] # Clean up extra spaces response = ' '.join(response.split()) # Fallback for very short or empty responses if len(response.strip()) < 20: response = "Could you tell me more about your symptoms? When did they start?" return response.strip() # ======================================================= # Gradio Interface # ======================================================= with gr.Blocks(theme=gr.themes.Soft(), css=""" .medical-header { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 20px; border-radius: 10px; color: white; text-align: center; margin-bottom: 20px; } """) as demo: gr.HTML("""
Interactive Medical Conversation • Just Like Visiting Your Doctor