github-actions[bot]
Deploy from GitHub Actions (commit: 8b247ffacd77c0672965b8378f1d52a7dcd187ae)
9366995
| import streamlit as st | |
| import json | |
| import pandas as pd | |
| import requests | |
| import io | |
| import time | |
| from typing import Dict, List | |
| import openai | |
| class ConversationEvaluator: | |
| def __init__(self): | |
| self.openai_client = None | |
| self.hf_api_key = None | |
| self.hf_api_url = "https://router.huggingface.co/v1/chat/completions" | |
| self.metrics = [ | |
| "empathy", "clarity", "therapeutic_alliance", | |
| "active_listening", "intervention_quality", "patient_engagement" | |
| ] | |
| def setup_openai(self, api_key: str): | |
| """Initialize OpenAI client""" | |
| try: | |
| openai.api_key = api_key | |
| self.openai_client = openai | |
| return True | |
| except Exception as e: | |
| st.error(f"OpenAI setup failed: {str(e)}") | |
| return False | |
| def setup_huggingface(self, api_key: str): | |
| """Initialize Hugging Face API client""" | |
| try: | |
| self.hf_api_key = api_key | |
| # Test the API connection with new chat completions format | |
| headers = { | |
| "Authorization": f"Bearer {api_key}", | |
| "Content-Type": "application/json" | |
| } | |
| test_payload = { | |
| "messages": [ | |
| { | |
| "role": "user", | |
| "content": "Hello, this is a test message." | |
| } | |
| ], | |
| "model": "deepseek-ai/DeepSeek-V3-0324", | |
| "stream": False | |
| } | |
| test_response = requests.post( | |
| self.hf_api_url, | |
| headers=headers, | |
| json=test_payload | |
| ) | |
| if test_response.status_code == 200: | |
| return True | |
| else: | |
| st.error(f"Hugging Face API test failed: {test_response.status_code} - {test_response.text}") | |
| return False | |
| except Exception as e: | |
| st.error(f"Hugging Face API setup failed: {str(e)}") | |
| return False | |
| def parse_conversation(self, file_content: str, file_type: str) -> List[Dict]: | |
| """Parse conversation file into structured format""" | |
| utterances = [] | |
| if file_type == "json": | |
| try: | |
| data = json.loads(file_content) | |
| if isinstance(data, list): | |
| for i, item in enumerate(data): | |
| utterances.append({ | |
| "speaker": item.get("speaker", "Unknown"), | |
| "text": item.get("text", ""), | |
| "timestamp": item.get("timestamp", i) | |
| }) | |
| else: | |
| # Handle nested JSON structure | |
| for speaker, messages in data.items(): | |
| for i, message in enumerate(messages): | |
| utterances.append({ | |
| "speaker": speaker, | |
| "text": message, | |
| "timestamp": i | |
| }) | |
| except json.JSONDecodeError: | |
| st.error("Invalid JSON format") | |
| return [] | |
| elif file_type == "txt": | |
| lines = file_content.split('\n') | |
| for i, line in enumerate(lines): | |
| if line.strip(): | |
| # Simple parsing: assume format "Speaker: Text" | |
| if ':' in line: | |
| speaker, text = line.split(':', 1) | |
| utterances.append({ | |
| "speaker": speaker.strip(), | |
| "text": text.strip(), | |
| "timestamp": i | |
| }) | |
| else: | |
| utterances.append({ | |
| "speaker": "Unknown", | |
| "text": line.strip(), | |
| "timestamp": i | |
| }) | |
| elif file_type == "csv": | |
| try: | |
| df = pd.read_csv(io.StringIO(file_content)) | |
| for _, row in df.iterrows(): | |
| utterances.append({ | |
| "speaker": row.get("speaker", "Unknown"), | |
| "text": row.get("text", ""), | |
| "timestamp": row.get("timestamp", len(utterances)) | |
| }) | |
| except Exception as e: | |
| st.error(f"CSV parsing error: {str(e)}") | |
| return [] | |
| return utterances | |
| def evaluate_with_openai(self, utterance: str, speaker: str) -> Dict[str, float]: | |
| """Evaluate utterance using OpenAI""" | |
| if not self.openai_client: | |
| return {} | |
| # Build metrics list based on what's available | |
| metric_descriptions = { | |
| 'empathy': 'Empathy (1-10): How empathetic and understanding is the response?', | |
| 'clarity': 'Clarity (1-10): How clear and understandable is the communication?', | |
| 'therapeutic_alliance': 'Therapeutic Alliance (1-10): How well does it build rapport and trust?', | |
| 'active_listening': 'Active Listening (1-10): How well does it show engagement and attention?', | |
| 'intervention_quality': 'Intervention Quality (1-10): How effective is the therapeutic technique?', | |
| 'patient_engagement': 'Patient Engagement (1-10): How well does it encourage patient participation?' | |
| } | |
| # Filter metrics to only include selected ones | |
| metrics_to_evaluate = [m for m in self.metrics if m in metric_descriptions] | |
| if not metrics_to_evaluate: | |
| return {} | |
| # Build JSON template | |
| json_template = {m: "X" for m in metrics_to_evaluate} | |
| json_str_template = json.dumps(json_template).replace('"X"', 'X') | |
| prompt = f""" | |
| Evaluate this {speaker} utterance on a scale of 1-10 for each metric: | |
| Utterance: "{utterance}" | |
| Provide scores for: | |
| """ | |
| for metric in metrics_to_evaluate: | |
| prompt += f"- {metric_descriptions.get(metric, metric)}\n" | |
| prompt += f"\nRespond with only the scores in JSON format: {json_str_template}" | |
| try: | |
| response = self.openai_client.responses.create( | |
| model="gpt-4o-mini", | |
| input=prompt, | |
| temperature=0.3 | |
| ) | |
| result = response.output_text.strip() | |
| # Extract JSON from response | |
| if "{" in result and "}" in result: | |
| json_start = result.find("{") | |
| json_end = result.rfind("}") + 1 | |
| json_str = result[json_start:json_end] | |
| scores = json.loads(json_str) | |
| # Filter to only return selected metrics | |
| return {k: v for k, v in scores.items() if k in metrics_to_evaluate} | |
| except Exception as e: | |
| st.warning(f"OpenAI evaluation failed: {str(e)}") | |
| return {} | |
| def evaluate_with_huggingface(self, utterance: str) -> Dict[str, float]: | |
| """Evaluate utterance using Hugging Face Chat Completions API""" | |
| if not self.hf_api_key: | |
| return {} | |
| # Build metrics list based on what's available | |
| metric_descriptions = { | |
| 'empathy': 'Empathy: How empathetic and understanding is the response?', | |
| 'clarity': 'Clarity: How clear and understandable is the communication?', | |
| 'therapeutic_alliance': 'Therapeutic Alliance: How well does it build rapport and trust?', | |
| 'active_listening': 'Active Listening: How well does it show engagement and attention?', | |
| 'intervention_quality': 'Intervention Quality: How effective is the therapeutic technique?', | |
| 'patient_engagement': 'Patient Engagement: How well does it encourage patient participation?' | |
| } | |
| # Filter metrics to only include selected ones | |
| metrics_to_evaluate = [m for m in self.metrics if m in metric_descriptions] | |
| if not metrics_to_evaluate: | |
| return {} | |
| try: | |
| headers = { | |
| "Authorization": f"Bearer {self.hf_api_key}", | |
| "Content-Type": "application/json" | |
| } | |
| # Build JSON template | |
| json_template = {m: "X" for m in metrics_to_evaluate} | |
| json_str_template = json.dumps(json_template).replace('"X"', 'X') | |
| # Create a prompt for therapeutic evaluation | |
| evaluation_prompt = f""" | |
| Please evaluate this therapeutic utterance on a scale of 1-10 for each metric: | |
| Utterance: "{utterance}" | |
| Rate each of the following metrics from 1-10: | |
| """ | |
| for metric in metrics_to_evaluate: | |
| evaluation_prompt += f"- {metric_descriptions.get(metric, metric)}\n" | |
| evaluation_prompt += f"\nRespond with only the scores in JSON format: {json_str_template}" | |
| payload = { | |
| "messages": [ | |
| { | |
| "role": "user", | |
| "content": evaluation_prompt | |
| } | |
| ], | |
| "model": "deepseek-ai/DeepSeek-V3-0324", # Using DeepSeek V3 model | |
| "stream": False, | |
| "temperature": 0.3 | |
| } | |
| response = requests.post( | |
| self.hf_api_url, | |
| headers=headers, | |
| json=payload | |
| ) | |
| if response.status_code == 200: | |
| result = response.json() | |
| content = result['choices'][0]['message']['content'] | |
| # Extract JSON from response | |
| try: | |
| if "{" in content and "}" in content: | |
| json_start = content.find("{") | |
| json_end = content.rfind("}") + 1 | |
| json_str = content[json_start:json_end] | |
| scores = json.loads(json_str) | |
| # Filter to only return selected metrics | |
| return {k: v for k, v in scores.items() if k in metrics_to_evaluate} | |
| else: | |
| # Fallback: return default scores if JSON parsing fails | |
| return {m: 5.0 for m in metrics_to_evaluate} | |
| except json.JSONDecodeError: | |
| # Fallback scores if JSON parsing fails | |
| return {m: 5.0 for m in metrics_to_evaluate} | |
| else: | |
| st.warning(f"Hugging Face API request failed: {response.status_code}") | |
| return {} | |
| except Exception as e: | |
| st.warning(f"Hugging Face API evaluation failed: {str(e)}") | |
| return {} | |
| def evaluate_conversation(self, utterances: List[Dict], use_openai: bool = True, use_hf: bool = True) -> List[Dict]: | |
| """Evaluate entire conversation""" | |
| results = [] | |
| progress_bar = st.progress(0) | |
| status_text = st.empty() | |
| for i, utterance in enumerate(utterances): | |
| status_text.text(f"Evaluating utterance {i+1}/{len(utterances)}") | |
| utterance_result = { | |
| "speaker": utterance["speaker"], | |
| "text": utterance["text"], | |
| "timestamp": utterance["timestamp"], | |
| "openai_scores": {}, | |
| "huggingface_scores": {} | |
| } | |
| # OpenAI evaluation | |
| if use_openai and self.openai_client: | |
| utterance_result["openai_scores"] = self.evaluate_with_openai( | |
| utterance["text"], utterance["speaker"] | |
| ) | |
| # Hugging Face evaluation | |
| if use_hf and self.hf_api_key: | |
| utterance_result["huggingface_scores"] = self.evaluate_with_huggingface( | |
| utterance["text"] | |
| ) | |
| results.append(utterance_result) | |
| progress_bar.progress((i + 1) / len(utterances)) | |
| time.sleep(0.1) # Small delay for better UX | |
| status_text.text("Evaluation complete!") | |
| return results | |
| # Helper functions | |
| def create_radar_chart(scores: Dict[str, float], title: str): | |
| """Create radar chart for scores""" | |
| import plotly.graph_objects as go | |
| categories = list(scores.keys()) | |
| values = list(scores.values()) | |
| fig = go.Figure() | |
| fig.add_trace(go.Scatterpolar( | |
| r=values, | |
| theta=categories, | |
| fill='toself', | |
| name=title, | |
| line_color='blue' | |
| )) | |
| fig.update_layout( | |
| polar=dict( | |
| radialaxis=dict( | |
| visible=True, | |
| range=[0, 10] | |
| )), | |
| showlegend=True, | |
| title=title, | |
| font_size=12 | |
| ) | |
| return fig | |
| def display_utterance_results(results: List[Dict]): | |
| """Display utterance-level results""" | |
| st.subheader("Utterance-Level Results") | |
| for i, result in enumerate(results): | |
| with st.expander(f"Utterance {i+1}: {result['speaker']} (Timestamp: {result['timestamp']})"): | |
| st.write(f"**Text:** {result['text']}") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.write("**OpenAI Scores:**") | |
| if result['openai_scores']: | |
| for metric, score in result['openai_scores'].items(): | |
| st.metric(metric.replace('_', ' ').title(), f"{score:.1f}/10") | |
| else: | |
| st.write("No OpenAI scores available") | |
| with col2: | |
| st.write("**Hugging Face Scores:**") | |
| if result['huggingface_scores']: | |
| for metric, score in result['huggingface_scores'].items(): | |
| st.metric(metric.replace('_', ' ').title(), f"{score:.1f}/10") | |
| else: | |
| st.write("No Hugging Face scores available") | |