Spaces:

Deva1211
/

medical_model

Sleeping

App Files Files Community

medical_model / app_optimized.py

Deva1211

added prompt for model behaviour

b09dbcd 5 months ago

raw

history blame contribute delete

7.06 kB

	import gradio as gr
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import logging
	import gc
	import warnings
	import os
	from huggingface_hub import login

	# Login with the secret token
	login(token=os.getenv("HF_TOKEN"))

	# Suppress warnings
	warnings.filterwarnings("ignore")
	logging.getLogger("transformers").setLevel(logging.ERROR)

	# Configuration for optimized performance
	MODEL_NAME = "microsoft/DialoGPT-medium"
	MAX_NEW_TOKENS = 150
	TEMPERATURE = 0.8
	TOP_P = 0.9

	# Medical enhancement prompt - detailed CareConnect specifications
	MEDICAL_CONTEXT = """You are a friendly and smart medical assistant. Your job is to give short, clear, and helpful health information.

	Your answers should:
	- Stay focused. No long essays or extra fluff.
	- Give basic helpful steps for common symptoms like fever, cough, or headache (e.g., rest, drink fluids, take paracetamol if needed).
	- For any serious or unclear issues, remind the user to see a doctor — but do it briefly and naturally.
	- Keep responses concise and under 4 sentences when possible.

	Tone:
	- Friendly, supportive, and calm.
	- No robotic warnings unless needed. Keep it real and human.
	- Use emojis like 😊 or 👍 occasionally to appear friendly.

	Important rules:
	- NEVER include text in parentheses in your responses.
	- NEVER include any meta-instructions in your responses.
	- NEVER include reminders about what you should do in future responses.
	- DO NOT include phrases like "We're here to help" or "I'm just an AI".
	- DO NOT include any text that instructs you what to do or how to behave.
	- DO NOT include any sentences that start with "If the user asks..." or "Remember..."
	- DO NOT include "(smile)" - instead, use actual emojis like 😊 or 👍 when appropriate.
	- DO NOT include numbered references like [1], [2], etc. in your responses.
	- DO NOT include any text that explains what your response is doing.
	- DO NOT include "user:" or "assistant:" prefixes in your responses.
	- DO NOT include hypothetical user questions in your responses.
	- DO NOT refuse to answer harmless non-medical questions like jokes or general knowledge.
	- Don't give exact dosages or diagnoses.
	- Be consistent in your responses regardless of the user's role."""

	# Global variables
	model = None
	tokenizer = None

	def load_model():
	"""Load DialoGPT model optimized for CPU"""
	global model, tokenizer

	try:
	print(f"🏥 Loading medical chatbot model: {MODEL_NAME}")

	# Load tokenizer
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, padding_side="left")
	if tokenizer.pad_token is None:
	tokenizer.pad_token = tokenizer.eos_token

	# Load model with CPU optimization
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_NAME,
	torch_dtype=torch.float32, # Use float32 for CPU
	low_cpu_mem_usage=True,
	trust_remote_code=True
	)

	print(f"✅ Model loaded successfully!")
	return True

	except Exception as e:
	print(f"❌ Failed to load model: {str(e)}")
	return False

	def generate_medical_response(prompt):
	"""Generate medical response with DialoGPT"""
	global model, tokenizer

	if model is None or tokenizer is None:
	return "❌ Model not loaded. Please wait for initialization."

	try:
	# Enhanced prompt for medical context
	medical_prompt = f"{MEDICAL_CONTEXT}\n\nUser: {prompt}\nAssistant:"

	print(f"🔄 Processing: {prompt[:50]}{'...' if len(prompt) > 50 else ''}")

	# Tokenize
	inputs = tokenizer.encode(medical_prompt, return_tensors="pt", max_length=400, truncation=True)

	# Generate with optimized parameters
	with torch.no_grad():
	outputs = model.generate(
	inputs,
	max_new_tokens=MAX_NEW_TOKENS,
	temperature=TEMPERATURE,
	top_p=TOP_P,
	do_sample=True,
	pad_token_id=tokenizer.eos_token_id,
	repetition_penalty=1.1,
	early_stopping=True,
	num_return_sequences=1
	)

	# Decode response
	full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)

	# Extract only the assistant's response
	if "Assistant:" in full_response:
	response = full_response.split("Assistant:")[-1].strip()
	else:
	response = full_response[len(medical_prompt):].strip()

	# Clean up response - keep it natural as per prompt guidelines
	if not response or len(response) < 10:
	response = "I'd be happy to help with your medical question. Could you please provide more specific details? 😊"

	print(f"✅ Response generated: {len(response)} characters")

	# Memory cleanup
	del inputs, outputs
	gc.collect()

	return response

	except Exception as e:
	print(f"❌ Generation error: {str(e)}")
	return f"I encountered a technical issue. Please try rephrasing your question. For immediate medical concerns, please consult a healthcare professional."

	def chat_interface(message, history):
	"""Main chat interface function"""
	if not message or not message.strip():
	return "Please enter a medical question."

	# Generate response
	response = generate_medical_response(message.strip())

	return response

	# Load model on startup
	print("🏥 Initializing Medical Chatbot...")
	model_loaded = load_model()

	if not model_loaded:
	print("⚠️ WARNING: Model failed to load. Responses may be limited.")

	# Create Gradio interface
	demo = gr.ChatInterface(
	chat_interface,
	type="messages",
	title="🏥 Medical Information Assistant",
	description="""
	A medical information chatbot powered by AI. This assistant provides educational health information.

	⚠️ Important Disclaimer: This chatbot provides general health information for educational purposes only.
	It should not replace professional medical advice, diagnosis, or treatment. Always consult qualified
	healthcare professionals for medical concerns.
	""",
	examples=[
	"What are the symptoms of diabetes?",
	"How can I maintain a healthy heart?",
	"What should I know about high blood pressure?",
	"Tell me about the importance of regular exercise",
	"What are common causes of headaches?",
	"How can I improve my sleep quality?"
	],
	cache_examples=False,
	theme=gr.themes.Soft(),
	css="""
	.gradio-container {
	max-width: 800px !important;
	margin: auto !important;
	}
	.message {
	border-radius: 10px !important;
	}
	"""
	)

	if __name__ == "__main__":
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=True,
	show_error=True,
	debug=True
	)