AnatomyLite / workapp.py
gladguy's picture
Fresh
4c6a071
Raw
History Blame Contribute Delete
6.33 kB
# app.py
import re # <--- CRITICAL: Needed for cleaning URLs
import gradio as gr
import os
import time
import json
from openai import OpenAI
from dotenv import load_dotenv
from mcp import ClientSession, StdioServerParameters
from mcp.client.stdio import stdio_client
from elevenlabs.client import ElevenLabs
load_dotenv()
# --- CONFIGURATION ---
client = OpenAI(
base_url="https://api.hyperbolic.xyz/v1",
api_key=os.getenv("HYPERBOLIC_API_KEY"),
)
MODEL_NAME = "meta-llama/Meta-Llama-3.1-70B-Instruct"
# app.py
samclient = OpenAI(
# SambaNova is FAST. Use it for the conversational parts.
base_url="https://api.sambanova.ai/v1",
api_key=os.getenv("SAMBANOVA_API_KEY"),
)
# Use their Llama 3.1 405B (High IQ) or 70B (High Speed)
samMODEL_NAME = "Meta-Llama-3.1-70B-Instruct"
eleven = ElevenLabs(api_key=os.getenv("ELEVENLABS_API_KEY"))
VOICE_ID = "JBFqnCBsd6RMkjVDRZzb"
SYSTEM_PROMPT = """
You are Anato-Mitra, a strict Indian Medical College Professor.
- If the student asks a question, ALWAYS use the 'search_anatomy_diagrams' tool.
- Be concise and strict.
- The diagram will be shown automatically, so you just need to explain the clinical significance.
"""
# --- 1. THE BRAIN (Agent Logic) ---
async def run_agent(user_message, history):
# Connect to MCP Server
server_params = StdioServerParameters(
command="uv",
args=["run", "viva_server.py"],
)
async with stdio_client(server_params) as (read, write):
async with ClientSession(read, write) as session:
await session.initialize()
tools_list = await session.list_tools()
openai_tools = [{
"type": "function",
"function": {
"name": t.name,
"description": t.description,
"parameters": t.inputSchema
}
} for t in tools_list.tools]
messages = [{"role": "system", "content": SYSTEM_PROMPT}]
messages.append({"role": "user", "content": user_message})
print("🧠 Thinking...")
response = client.chat.completions.create(
model=MODEL_NAME,
messages=messages,
tools=openai_tools,
tool_choice="auto"
)
final_response = response.choices[0].message.content or ""
tool_image_markdown = ""
# Handle Tool Calls
if response.choices[0].message.tool_calls:
tool_call = response.choices[0].message.tool_calls[0]
fn_name = tool_call.function.name
fn_args = tool_call.function.arguments
print(f"🔧 Tool Call: {fn_name}")
args_dict = json.loads(fn_args)
result = await session.call_tool(fn_name, arguments=args_dict)
tool_output = result.content[0].text
# Capture the image to display later
tool_image_markdown = f"\n\n---\n**Reference Diagram:**\n{tool_output}"
# Context Injection
messages.append({
"role": "user",
"content": f"SYSTEM DATA: {tool_output}\n\nAnswer the student."
})
final_response_obj = client.chat.completions.create(
model=MODEL_NAME,
messages=messages
)
final_response = final_response_obj.choices[0].message.content
# Force show the diagram if one was found
if tool_image_markdown:
final_response += tool_image_markdown
# Note: We do NOT generate audio here anymore.
return final_response
# --- 2. THE VOICE (Clean Audio Generator) ---
def generate_audio(chat_history):
"""
Generates audio for the LAST message.
Cleans URLs and Markdown before sending to ElevenLabs.
"""
if not chat_history:
return None
# Get the last message from the bot
last_bot_message = chat_history[-1]['content']
# --- CLEANING STEP (Regex) ---
# 1. Remove Markdown Images like ![Diagram](...)
clean_text = re.sub(r'!\[.*?\]\(.*?\)', '', last_bot_message)
# 2. Remove raw URLs like https://...
clean_text = re.sub(r'https?://\S+', '', clean_text)
# 3. Cleanup specific phrases
clean_text = clean_text.replace("Reference Diagram:", "").strip()
print(f"🗣️ Speaking (Cleaned): {clean_text[:50]}...")
try:
audio_generator = eleven.text_to_speech.convert(
text=clean_text,
voice_id=VOICE_ID,
model_id="eleven_multilingual_v2"
)
audio_path = f"voice_{int(time.time())}.mp3"
with open(audio_path, "wb") as f:
for chunk in audio_generator:
f.write(chunk)
return audio_path
except Exception as e:
print(f"Audio Error: {e}")
return None
# --- 3. THE UI ---
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# 🩻 Anato-Mitra: VIVA Companion -Upper Limb & Thorax Anatomy")
with gr.Row():
with gr.Column(scale=2):
chatbot = gr.Chatbot(type="messages", height=500)
msg = gr.Textbox(placeholder="Ask: 'Show me the Circle of Willis'")
with gr.Column(scale=1):
gr.Markdown("### 🔊 VIVA Examiner Controls")
audio_out = gr.Audio(label="Audio Output", type="filepath")
# This button calls the generate_audio function manually
btn_speak = gr.Button("🔊 Speak Response (Save Credits)", variant="primary")
async def respond(message, chat_history):
bot_message = await run_agent(message, chat_history)
chat_history.append({"role": "user", "content": message})
chat_history.append({"role": "assistant", "content": bot_message})
return "", chat_history
msg.submit(respond, [msg, chatbot], [msg, chatbot])
# Connect the button to the Clean Audio Function
btn_speak.click(generate_audio, inputs=chatbot, outputs=audio_out)
if __name__ == "__main__":
demo.launch(share=True, auth=("waheed", "jcet123"))