Spaces:

siyah1
/

previsit

Sleeping

App Files Files Community

siyah1 commited on Aug 22, 2025

Commit

baba48f

verified ·

1 Parent(s): bdf07fc

Update app.py

Browse files

Files changed (1) hide show

app.py +384 -55

app.py CHANGED Viewed

@@ -1,70 +1,399 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
-def respond(
-    message,
-    history: list[dict[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-    hf_token: gr.OAuthToken,
-):
-    """
-    For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-    """
-    client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
-    messages = [{"role": "system", "content": system_message}]
-    messages.extend(history)
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        choices = message.choices
-        token = ""
-        if len(choices) and choices[0].delta.content:
-            token = choices[0].delta.content
-        response += token
-        yield response
 """
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-chatbot = gr.ChatInterface(
-    respond,
-    type="messages",
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
-    ],
-)
-with gr.Blocks() as demo:
-    with gr.Sidebar():
-        gr.LoginButton()
-    chatbot.render()
 if __name__ == "__main__":
-    demo.launch()

+import asyncio
+import base64
+import json
+import os
+from threading import Event
+from datetime import datetime
 import gradio as gr
+import numpy as np
+import websockets.sync.client
+from dotenv import load_dotenv
+from gradio_webrtc import StreamHandler, WebRTC, get_twilio_turn_credentials
+load_dotenv()
+class GeminiConfig:
+    def __init__(self, api_key):
+        self.api_key = api_key
+        self.host = "generativelanguage.googleapis.com"
+        self.model = "models/gemini-2.0-flash-exp"
+        self.ws_url = f"wss://{self.host}/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent?key={self.api_key}"
+class AudioProcessor:
+    @staticmethod
+    def encode_audio(data, sample_rate):
+        encoded = base64.b64encode(data.tobytes()).decode("UTF-8")
+        return {
+            "realtimeInput": {
+                "mediaChunks": [
+                    {
+                        "mimeType": f"audio/pcm;rate={sample_rate}",
+                        "data": encoded,
+                    }
+                ],
+            },
+        }
+    @staticmethod
+    def process_audio_response(data):
+        audio_data = base64.b64decode(data)
+        return np.frombuffer(audio_data, dtype=np.int16)
+class ConversationTracker:
+    def __init__(self):
+        self.conversation_history = []
+        self.start_time = None
+        self.end_time = None
+        self.session_active = False
+    def start_session(self):
+        self.start_time = datetime.now()
+        self.session_active = True
+        self.conversation_history = []
+    def add_message(self, message, is_user=False):
+        timestamp = datetime.now()
+        self.conversation_history.append({
+            "timestamp": timestamp,
+            "message": message,
+            "speaker": "Patient" if is_user else "AI Agent",
+            "type": "voice"
+        })
+    def end_session(self):
+        self.end_time = datetime.now()
+        self.session_active = False
+    def generate_report(self):
+        if not self.conversation_history:
+            return "No conversation data available."
+        duration = (self.end_time - self.start_time).total_seconds() / 60 if self.end_time else 0
+        report = f"""
+PRECONSULTATION SUMMARY REPORT
+Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
+Session Duration: {duration:.1f} minutes
+Total Exchanges: {len(self.conversation_history)}
+CONVERSATION SUMMARY:
+This preconsultation session involved a voice-based interaction between the patient and an AI consultation agent. The AI gathered preliminary information to assist healthcare providers in understanding the patient's needs before their appointment.
+KEY POINTS DISCUSSED:
 """
+        # Extract key information from conversation
+        user_messages = [msg["message"] for msg in self.conversation_history if msg["speaker"] == "Patient"]
+        if user_messages:
+            report += "- Patient concerns and symptoms mentioned during the session\n"
+            report += "- Medical history and current health status discussed\n"
+            report += "- Expectations and questions for the upcoming consultation\n\n"
+        report += "RECOMMENDATIONS FOR HEALTHCARE PROVIDER:\n"
+        report += "- Review the patient's expressed concerns\n"
+        report += "- Consider the preliminary information gathered\n"
+        report += "- Address any specific questions or anxieties mentioned\n"
+        report += "- Follow up on symptoms or conditions discussed\n\n"
+        report += "NOTE: This is an AI-generated summary for informational purposes only. "
+        report += "Professional medical judgment should always take precedence.\n"
+        return report
+class GeminiHandler(StreamHandler):
+    def __init__(
+        self, expected_layout="mono", output_sample_rate=24000, output_frame_size=480
+    ) -> None:
+        super().__init__(
+            expected_layout,
+            output_sample_rate,
+            output_frame_size,
+            input_sample_rate=24000,
+        )
+        self.config = None
+        self.ws = None
+        self.all_output_data = None
+        self.audio_processor = AudioProcessor()
+        self.args_set = Event()
+        self.conversation_tracker = ConversationTracker()
+        self.system_prompt_sent = False
+    def copy(self):
+        handler = GeminiHandler(
+            expected_layout=self.expected_layout,
+            output_sample_rate=self.output_sample_rate,
+            output_frame_size=self.output_frame_size,
+        )
+        handler.conversation_tracker = self.conversation_tracker
+        return handler
+    def _initialize_websocket(self):
+        assert self.config, "Config not set"
+        try:
+            self.ws = websockets.sync.client.connect(self.config.ws_url, timeout=30)
+            initial_request = {
+                "setup": {
+                    "model": self.config.model,
+                    "systemInstruction": {
+                        "parts": [
+                            {
+                                "text": """You are a friendly and professional AI preconsultation agent designed to help patients prepare for their medical appointments. Your role is to:
+1. Warmly greet patients and explain your purpose
+2. Gather preliminary information about their health concerns
+3. Ask relevant questions about symptoms, medical history, and current medications
+4. Address any anxieties or questions they have about their upcoming appointment
+5. Provide reassurance and basic health education when appropriate
+6. Keep the conversation focused and efficient (aim for 5-10 minutes)
+Guidelines:
+- Be empathetic and professional
+- Ask one question at a time
+- Listen actively and acknowledge concerns
+- Don't provide medical diagnoses or treatment advice
+- Encourage patients to discuss all concerns with their healthcare provider
+- Keep responses concise but warm
+- When the patient indicates they're ready to end or have covered their main concerns, offer to summarize and conclude
+Start by introducing yourself and asking how you can help them prepare for their appointment."""
+                            }
+                        ]
+                    }
+                }
+            }
+            self.ws.send(json.dumps(initial_request))
+            setup_response = json.loads(self.ws.recv())
+            print(f"Setup response: {setup_response}")
+            self.conversation_tracker.start_session()
+        except websockets.exceptions.WebSocketException as e:
+            print(f"WebSocket connection failed: {str(e)}")
+            self.ws = None
+        except Exception as e:
+            print(f"Setup failed: {str(e)}")
+            self.ws = None
+    async def fetch_args(self):
+        if self.channel:
+            self.channel.send("tick")
+    def set_args(self, args):
+        super().set_args(args)
+        self.args_set.set()
+    def receive(self, frame: tuple[int, np.ndarray]) -> None:
+        if not self.channel:
+            return
+        if not self.config:
+            # Get API key from environment variable
+            api_key = os.getenv('GEMINI_API_KEY')
+            if not api_key:
+                print("Error: GEMINI_API_KEY environment variable not set")
+                return
+            self.config = GeminiConfig(api_key)
+        try:
+            if not self.ws:
+                self._initialize_websocket()
+            _, array = frame
+            array = array.squeeze()
+            audio_message = self.audio_processor.encode_audio(
+                array, self.output_sample_rate
+            )
+            self.ws.send(json.dumps(audio_message))
+        except Exception as e:
+            print(f"Error in receive: {str(e)}")
+            if self.ws:
+                self.ws.close()
+            self.ws = None
+    def _process_server_content(self, content):
+        # Track AI responses
+        for part in content.get("parts", []):
+            if "text" in part:
+                self.conversation_tracker.add_message(part["text"], is_user=False)
+            data = part.get("inlineData", {}).get("data", "")
+            if data:
+                audio_array = self.audio_processor.process_audio_response(data)
+                if self.all_output_data is None:
+                    self.all_output_data = audio_array
+                else:
+                    self.all_output_data = np.concatenate(
+                        (self.all_output_data, audio_array)
+                    )
+                while self.all_output_data.shape[-1] >= self.output_frame_size:
+                    yield (
+                        self.output_sample_rate,
+                        self.all_output_data[: self.output_frame_size].reshape(1, -1),
+                    )
+                    self.all_output_data = self.all_output_data[
+                        self.output_frame_size :
+                    ]
+    def generator(self):
+        while True:
+            if not self.ws or not self.config:
+                print("WebSocket not connected")
+                yield None
+                continue
+            try:
+                message = self.ws.recv(timeout=5)
+                msg = json.loads(message)
+                if "serverContent" in msg:
+                    content = msg["serverContent"].get("modelTurn", {})
+                    yield from self._process_server_content(content)
+            except TimeoutError:
+                print("Timeout waiting for server response")
+                yield None
+            except Exception as e:
+                print(f"Error in generator: {str(e)}")
+                yield None
+    def emit(self) -> tuple[int, np.ndarray] | None:
+        if not self.ws:
+            return None
+        if not hasattr(self, "_generator"):
+            self._generator = self.generator()
+        try:
+            return next(self._generator)
+        except StopIteration:
+            self.reset()
+            return None
+    def reset(self) -> None:
+        if hasattr(self, "_generator"):
+            delattr(self, "_generator")
+        self.all_output_data = None
+    def shutdown(self) -> None:
+        if self.ws:
+            self.ws.close()
+        if self.conversation_tracker.session_active:
+            self.conversation_tracker.end_session()
+    def check_connection(self):
+        try:
+            if not self.ws or self.ws.closed:
+                self._initialize_websocket()
+            return True
+        except Exception as e:
+            print(f"Connection check failed: {str(e)}")
+            return False
+class PreconsultationApp:
+    def __init__(self):
+        self.handler = None
+        self.demo = self._create_interface()
+    def _create_interface(self):
+        with gr.Blocks(title="AI Preconsultation Agent") as demo:
+            gr.HTML("""
+                <div style='text-align: center; margin-bottom: 20px'>
+                    <h1>🩺 AI Preconsultation Agent</h1>
+                    <p>Prepare for your medical appointment with our AI assistant</p>
+                    <p style='color: #666; font-size: 14px'>
+                        This AI agent will help gather preliminary information before your consultation
+                    </p>
+                </div>
+            """)
+            with gr.Row():
+                with gr.Column(scale=2):
+                    webrtc = WebRTC(
+                        label="Voice Consultation",
+                        modality="audio",
+                        mode="send-receive",
+                        rtc_configuration=get_twilio_turn_credentials(),
+                    )
+                with gr.Column(scale=1):
+                    gr.HTML("""
+                        <div style='background-color: #f0f9ff; padding: 15px; border-radius: 8px; margin-bottom: 15px'>
+                            <h3 style='margin-top: 0'>How it works:</h3>
+                            <ol style='margin-bottom: 0'>
+                                <li>Click "Start" to begin the voice consultation</li>
+                                <li>Speak naturally with the AI agent</li>
+                                <li>Share your health concerns and questions</li>
+                                <li>End the session when ready</li>
+                                <li>Get a summary report for your healthcare provider</li>
+                            </ol>
+                        </div>
+                    """)
+                    end_session_btn = gr.Button(
+                        "End Session & Generate Report",
+                        variant="primary",
+                        size="lg"
+                    )
+            with gr.Row():
+                report_output = gr.Textbox(
+                    label="Consultation Summary Report",
+                    placeholder="Your consultation report will appear here after ending the session...",
+                    lines=15,
+                    max_lines=20,
+                    visible=False
+                )
+            # Set up the WebRTC stream
+            self.handler = GeminiHandler()
+            webrtc.stream(
+                self.handler,
+                inputs=[webrtc],
+                outputs=[webrtc],
+                time_limit=600,  # 10 minutes max
+                concurrency_limit=1,
+            )
+            # Handle end session
+            def end_session():
+                if self.handler and self.handler.conversation_tracker.session_active:
+                    self.handler.conversation_tracker.end_session()
+                    report = self.handler.conversation_tracker.generate_report()
+                    return gr.update(value=report, visible=True)
+                return gr.update(value="No active session to end.", visible=True)
+            end_session_btn.click(
+                end_session,
+                outputs=[report_output]
+            )
+            gr.HTML("""
+                <div style='text-align: center; margin-top: 20px; padding: 15px; background-color: #fef3c7; border-radius: 8px'>
+                    <p style='margin: 0; color: #92400e'>
+                        <strong>Important:</strong> This AI agent is for preliminary consultation only.
+                        Always consult with qualified healthcare professionals for medical advice.
+                    </p>
+                </div>
+            """)
+        return demo
+    def launch(self):
+        # Check if API key is set
+        if not os.getenv('GEMINI_API_KEY'):
+            print("Error: Please set the GEMINI_API_KEY environment variable")
+            print("You can get a Gemini API key from: https://ai.google.dev/gemini-api/docs/api-key")
+            return
+        self.demo.launch(
+            server_name="0.0.0.0",
+            server_port=int(os.environ.get("PORT", 7860)),
+            ssl_verify=False,
+            ssl_keyfile=None,
+            ssl_certfile=None,
+        )
 if __name__ == "__main__":
+    app = PreconsultationApp()
+    app.launch()