MediVox

Sleeping

App Files Files Community

MediVox / app.py

gauravgulati619

Fix SSR in gradio

a09fc69 about 1 year ago

raw

history blame contribute delete

4.76 kB

	import os
	import gradio as gr
	import soundfile as sf # For audio handling

	from brain import encode_image, analyze_image_with_query
	from patientvoice import record_audio, transcribe_with_groq
	from doctorvoice import text_to_speech_with_gtts
	from dotenv import load_dotenv

	# Load environment variables
	load_dotenv()

	system_prompt = """You have to act as a professional doctor, i know you are not but this is for learning purpose.
	What's in this image?. Do you find anything wrong with it medically?
	If you make a differential, suggest some remedies for them. Donot add any numbers or special characters in
	your response. Your response should be in one long paragraph. Also always answer as if you are answering to a real person.
	Donot say 'In the image I see' but say 'With what I see, I think you have ....'
	Dont respond as an AI model in markdown, your answer should mimic that of an actual doctor not an AI bot,
	Keep your answer concise (max 2 sentences). No preamble, start your answer right away please"""

	voice_system_prompt = """You have to act as a professional doctor, i know you are not but this is for learning purpose.
	Please respond to the patient's query in a helpful and medical manner.
	Dont respond as an AI model in markdown, your answer should mimic that of an actual doctor not an AI bot,
	Keep your answer concise (max 2 sentences). No preamble, start your answer right away please"""

	def process_inputs(audio_data, image_filepath):
	speech_to_text_output = ""
	doctor_response = ""
	voice_of_doctor = None
	query_text = ""

	# Handle audio input from microphone
	if audio_data is not None:
	sample_rate, audio_array = audio_data
	audio_filepath = "temp_audio.wav"
	sf.write(audio_filepath, audio_array, sample_rate)

	# Transcribe audio with error handling
	try:
	speech_to_text_output = transcribe_with_groq(
	audio_filepath=audio_filepath,
	stt_model="whisper-large-v3"
	)
	query_text = speech_to_text_output
	except Exception as e:
	speech_to_text_output = f"Error in transcription: {str(e)}"

	# Handle the image input with error handling
	if image_filepath:
	try:
	# If we have both voice and image, use voice as context for the image
	prompt = system_prompt
	if query_text:
	prompt += "\n\nPatient's description: " + query_text

	doctor_response = analyze_image_with_query(
	query=prompt,
	encoded_image=encode_image(image_filepath),
	model="gemini-2.0-flash"
	)
	except Exception as e:
	doctor_response = f"Error in image analysis: {str(e)}"
	elif query_text: # Handle voice-only query
	try:
	import google.generativeai as genai
	genai.configure(api_key=os.environ.get("GOOGLE_AI_STUDIO_API_KEY"))
	model = genai.GenerativeModel("gemini-2.0-flash")
	prompt = voice_system_prompt + "\n\nPatient's query: " + query_text
	response = model.generate_content(prompt)
	doctor_response = response.text
	except Exception as e:
	doctor_response = f"Error in processing voice query: {str(e)}"
	else:
	doctor_response = "Please provide an image or speak to the doctor"

	# Generate doctor's voice with error handling
	if doctor_response and doctor_response not in ["No image provided for me to analyze", "Please provide an image or speak to the doctor"]:
	try:
	voice_of_doctor = text_to_speech_with_gtts(input_text=doctor_response, output_filepath="final.mp3")
	except Exception as e:
	print(f"Error in text-to-speech: {str(e)}")

	return speech_to_text_output, doctor_response, voice_of_doctor

	# Custom CSS for title size
	custom_css = """
	h1 {
	font-size: 2.5rem !important;
	}
	"""

	# Create the interface
	iface = gr.Interface(
	fn=process_inputs,
	inputs=[
	gr.Audio(sources=["microphone"], type="numpy", label="Speak to the Doctor (Optional)"),
	gr.Image(type="filepath", label="Upload an Image (Optional)")
	],
	outputs=[
	gr.Textbox(label="Speech to Text"),
	gr.Textbox(label="Doctor's Response"),
	gr.Audio(label="Doctor's Voice")
	],
	title="MediVox : AI Doctor with Vision and Voice",
	css=custom_css, # Keep CSS for title size only
	theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Roboto"), "Consolas", "sans-serif"]) # Revert to Soft theme with custom font
	)

	iface.launch() # Removed ssr=False