Spaces:

Muhammadidrees
/

JAYConverstionalAI

Paused

App Files Files Community

JAYConverstionalAI / withvoiceandfrontend .py

Muhammadidrees

Rename app.py to withvoiceandfrontend .py

a0a8be5 verified 2 months ago

raw

history blame

14.3 kB

	import os
	import gc
	import torch
	import gradio as gr
	from transformers import LlamaTokenizer, LlamaForCausalLM, StoppingCriteria, StoppingCriteriaList

	# =============================
	# Configuration
	# =============================
	MODEL_PATH = r"Muhammadidrees/JayConverstionalModel"
	MAX_NEW_TOKENS = 200
	TEMPERATURE = 0.5
	TOP_K = 50
	REPETITION_PENALTY = 1.1

	# Detect device
	device = "cuda" if torch.cuda.is_available() else "cpu"
	print(f"Loading model from {MODEL_PATH} on {device}...")

	# =============================
	# Load Tokenizer and Model
	# =============================
	tokenizer = LlamaTokenizer.from_pretrained(MODEL_PATH)
	model = LlamaForCausalLM.from_pretrained(
	MODEL_PATH,
	device_map="auto",
	torch_dtype=torch.float16,
	low_cpu_mem_usage=True
	)

	generator = model.generate
	print("✅ ChatDoctor model loaded successfully!\n")

	# =============================
	# Stopping Criteria
	# =============================
	class StopOnTokens(StoppingCriteria):
	def __init__(self, stop_ids):
	self.stop_ids = stop_ids

	def __call__(self, input_ids, scores, **kwargs):
	for stop_id_seq in self.stop_ids:
	if len(stop_id_seq) == 1:
	if input_ids[0][-1] == stop_id_seq[0]:
	return True
	else:
	if len(input_ids[0]) >= len(stop_id_seq):
	if input_ids[0][-len(stop_id_seq):].tolist() == stop_id_seq:
	return True
	return False

	# =============================
	# Get Response Function
	# =============================
	def get_response(user_input, history_context):
	"""Generate response from ChatDoctor model"""
	human_invitation = "Patient: "
	doctor_invitation = "ChatDoctor: "

	# Build conversation from history
	history_text = []
	for human, assistant in history_context:
	if human:
	history_text.append(human_invitation + human)
	if assistant:
	history_text.append(doctor_invitation + assistant)

	# Add current user input
	history_text.append(human_invitation + user_input)

	# Build conversation prompt
	prompt = "\n".join(history_text) + "\n" + doctor_invitation
	input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)

	# Define stop words and their token IDs
	stop_words = ["Patient:", "\nPatient:", "Patient :", "\n\nPatient"]
	stop_ids = [tokenizer.encode(word, add_special_tokens=False) for word in stop_words]
	stopping_criteria = StoppingCriteriaList([StopOnTokens(stop_ids)])

	# Generate model response
	with torch.no_grad():
	output_ids = generator(
	input_ids,
	max_new_tokens=MAX_NEW_TOKENS,
	do_sample=True,
	temperature=TEMPERATURE,
	top_k=TOP_K,
	repetition_penalty=REPETITION_PENALTY,
	stopping_criteria=stopping_criteria,
	pad_token_id=tokenizer.eos_token_id,
	eos_token_id=tokenizer.eos_token_id
	)

	# Decode and clean response
	full_output = tokenizer.decode(output_ids[0], skip_special_tokens=True)
	response = full_output[len(prompt):].strip()

	# Remove any "Patient:" that might have slipped through
	for stop_word in ["Patient:", "Patient :", "\nPatient:", "\nPatient", "Patient"]:
	if stop_word in response:
	response = response.split(stop_word)[0].strip()
	break

	response = response.strip()

	# Free memory
	del input_ids, output_ids
	gc.collect()
	torch.cuda.empty_cache()

	return response

	# =============================
	# Gradio Chat Function
	# =============================
	def chat_function(message, history):
	"""Gradio chat interface function"""
	if not message.strip():
	return ""

	try:
	response = get_response(message, history)
	return response
	except Exception as e:
	return f"Error: {str(e)}"

	# =============================
	# Text-to-Speech Function
	# =============================
	def text_to_speech(text):
	"""Convert text response to speech"""
	try:
	from gtts import gTTS
	import tempfile

	if not text or text.startswith("Error:"):
	return None

	# Create speech
	tts = gTTS(text=text, lang='en', slow=False)

	# Save to temporary file
	temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
	tts.save(temp_file.name)

	return temp_file.name
	except Exception as e:
	print(f"TTS Error: {e}")
	return None

	# =============================
	# Custom CSS
	# =============================
	custom_css = """
	#header {
	text-align: center;
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	color: white;
	padding: 20px;
	border-radius: 10px;
	margin-bottom: 20px;
	}

	#header h1 {
	margin: 0;
	font-size: 2.5em;
	}

	#header p {
	margin: 10px 0 0 0;
	font-size: 1.1em;
	opacity: 0.9;
	}

	.disclaimer {
	background-color: #fff3cd;
	border: 1px solid #ffc107;
	border-radius: 8px;
	padding: 15px;
	margin: 20px 0;
	color: #856404;
	}

	.disclaimer h3 {
	margin-top: 0;
	color: #856404;
	}

	.voice-section {
	background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);
	padding: 20px;
	border-radius: 10px;
	margin: 20px 0;
	}

	footer {
	text-align: center;
	margin-top: 30px;
	color: #666;
	font-size: 0.9em;
	}
	"""

	# =============================
	# Gradio Interface
	# =============================
	with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
	# Header
	gr.HTML("""
	<div id="header">
	<h1>🩺 ChatDoctor AI Assistant</h1>
	<p>Your AI-powered medical conversation partner with Voice Support</p>
	</div>
	""")

	# Disclaimer
	gr.HTML("""
	<div class="disclaimer">
	<h3>⚠️ Medical Disclaimer</h3>
	<p><strong>Important:</strong> This AI assistant is for informational and educational purposes only.
	It is NOT a substitute for professional medical advice, diagnosis, or treatment.
	Always seek the advice of your physician or other qualified health provider with any questions
	you may have regarding a medical condition. Never disregard professional medical advice or
	delay in seeking it because of something you have read here.</p>
	</div>
	""")

	with gr.Row():
	with gr.Column(scale=7):
	# Chatbot Interface
	chatbot = gr.Chatbot(
	height=500,
	placeholder="<div style='text-align: center; padding: 40px;'><h3>👋 Welcome to ChatDoctor!</h3><p>I'm here to discuss your health concerns. Type or speak your question!</p></div>",
	show_label=False,
	avatar_images=(None, "🤖"),
	)

	with gr.Row():
	msg = gr.Textbox(
	placeholder="Type your message here... (e.g., 'I have a headache')",
	show_label=False,
	scale=9,
	container=False
	)
	submit_btn = gr.Button("Send 📤", scale=1, variant="primary")

	with gr.Row():
	clear_btn = gr.Button("🗑️ Clear Chat", scale=1)
	retry_btn = gr.Button("🔄 Retry", scale=1)

	with gr.Column(scale=3):
	# Voice Input Section
	gr.HTML("<div class='voice-section'><h3 style='color: white; text-align: center; margin-top: 0;'>🎤 Voice Features</h3></div>")

	audio_input = gr.Audio(
	sources=["microphone"],
	type="filepath",
	label="🎙️ Speak Your Question",
	show_download_button=False
	)

	transcribed_text = gr.Textbox(
	label="📝 Transcribed Text",
	placeholder="Your speech will appear here...",
	interactive=False,
	lines=3
	)

	send_voice_btn = gr.Button("Send Voice Message 🔊", variant="primary")

	gr.Markdown("---")

	# Voice Output
	tts_enabled = gr.Checkbox(
	label="🔊 Enable Text-to-Speech for responses",
	value=True,
	info="Hear the doctor's response"
	)

	audio_output = gr.Audio(
	label="🔈 AI Response Audio",
	autoplay=False,
	visible=True
	)

	# Examples
	gr.Examples(
	examples=[
	"I have a persistent headache for 3 days. What should I do?",
	"What are the symptoms of diabetes?",
	"How can I improve my sleep quality?",
	"I have a fever and sore throat. Should I be concerned?",
	"What are some natural ways to reduce stress?",
	],
	inputs=msg,
	label="💡 Example Questions"
	)

	# Settings (collapsed by default)
	with gr.Accordion("⚙️ Advanced Settings", open=False):
	temperature_slider = gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=TEMPERATURE,
	step=0.1,
	label="Temperature (Creativity)",
	info="Higher values make responses more creative but less focused"
	)
	max_tokens_slider = gr.Slider(
	minimum=50,
	maximum=500,
	value=MAX_NEW_TOKENS,
	step=50,
	label="Max Response Length",
	info="Maximum number of tokens in response"
	)
	top_k_slider = gr.Slider(
	minimum=1,
	maximum=100,
	value=TOP_K,
	step=1,
	label="Top K",
	info="Limits vocabulary selection"
	)

	# Footer
	gr.HTML("""
	<footer>
	<p>Powered by ChatDoctor Model \| Built with Gradio \| Voice-Enabled 🎤</p>
	<p>Device: """ + device.upper() + """ \| Model: LLaMA-based Medical AI</p>
	</footer>
	""")

	# =============================
	# Event Handlers
	# =============================

	def user_message(user_msg, history):
	return "", history + [[user_msg, None]], None

	def bot_response(history, temp, max_tok, top_k_val, tts_enabled_val):
	global TEMPERATURE, MAX_NEW_TOKENS, TOP_K
	TEMPERATURE = temp
	MAX_NEW_TOKENS = int(max_tok)
	TOP_K = int(top_k_val)

	user_msg = history[-1][0]
	bot_msg = chat_function(user_msg, history[:-1])
	history[-1][1] = bot_msg

	# Generate audio if TTS is enabled
	audio_file = None
	if tts_enabled_val and bot_msg and not bot_msg.startswith("Error:"):
	audio_file = text_to_speech(bot_msg)

	return history, audio_file

	def transcribe_audio(audio_file):
	"""Transcribe audio to text using Whisper"""
	if audio_file is None:
	return ""

	try:
	import whisper
	model = whisper.load_model("base")
	result = model.transcribe(audio_file)
	return result["text"]
	except ImportError:
	return "Error: Please install whisper: pip install openai-whisper"
	except Exception as e:
	return f"Transcription error: {str(e)}"

	def process_voice_input(audio_file, history, temp, max_tok, top_k_val, tts_enabled_val):
	"""Process voice input: transcribe -> send -> get response"""
	if audio_file is None:
	return history, "", None, None

	# Transcribe
	transcribed = transcribe_audio(audio_file)

	if transcribed.startswith("Error:"):
	return history, transcribed, None, None

	# Add to chat
	history = history + [[transcribed, None]]

	# Get response
	global TEMPERATURE, MAX_NEW_TOKENS, TOP_K
	TEMPERATURE = temp
	MAX_NEW_TOKENS = int(max_tok)
	TOP_K = int(top_k_val)

	bot_msg = chat_function(transcribed, history[:-1])
	history[-1][1] = bot_msg

	# Generate audio if TTS is enabled
	audio_file = None
	if tts_enabled_val and bot_msg and not bot_msg.startswith("Error:"):
	audio_file = text_to_speech(bot_msg)

	return history, transcribed, None, audio_file

	# Text input events
	msg.submit(
	user_message,
	[msg, chatbot],
	[msg, chatbot, audio_output],
	queue=False
	).then(
	bot_response,
	[chatbot, temperature_slider, max_tokens_slider, top_k_slider, tts_enabled],
	[chatbot, audio_output]
	)

	submit_btn.click(
	user_message,
	[msg, chatbot],
	[msg, chatbot, audio_output],
	queue=False
	).then(
	bot_response,
	[chatbot, temperature_slider, max_tokens_slider, top_k_slider, tts_enabled],
	[chatbot, audio_output]
	)

	# Voice input events
	audio_input.change(
	transcribe_audio,
	[audio_input],
	[transcribed_text]
	)

	send_voice_btn.click(
	process_voice_input,
	[audio_input, chatbot, temperature_slider, max_tokens_slider, top_k_slider, tts_enabled],
	[chatbot, transcribed_text, audio_input, audio_output]
	)

	# Clear and retry
	clear_btn.click(lambda: (None, None, None), None, [chatbot, audio_output, transcribed_text], queue=False)

	retry_btn.click(lambda: None, None, chatbot, queue=False)

	# =============================
	# Launch Interface
	# =============================
	if __name__ == "__main__":
	print("\n🚀 Launching ChatDoctor Gradio Interface with Voice Support...")
	print("\n📦 Required packages:")
	print(" pip install gradio gTTS openai-whisper")
	print("\nNote: Whisper will download models on first use (~100MB for base model)\n")

	demo.queue()
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False,
	show_error=True
	)