transmed_poe / src /streamlit_app.py
DrMostafa's picture
Update src/streamlit_app.py
2995acd verified
"""
Data Analyst Agent - Streamlit App
Features: Streaming, Audio Input (OpenAI Whisper), Chart Rendering
"""
import streamlit as st
import openai
import base64
import re
import os
import io
import tempfile
import requests
import pandas as pd
from pathlib import Path
# Page config
st.set_page_config(
page_title="Data Analyst Agent for Transmed",
page_icon="πŸ“Š",
layout="wide"
)
# API Keys
OPENAI_API_KEY = "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"
# Temp directory
TEMP_DIR = Path(tempfile.gettempdir()) / "data_analyst"
TEMP_DIR.mkdir(exist_ok=True)
# Custom CSS for stunning UI
st.markdown("""
<style>
/* Import fonts */
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');
/* Global styles */
.stApp {
font-family: 'Inter', sans-serif;
}
/* Main header with gradient */
.main-header {
text-align: center;
padding: 2.5rem 2rem;
background: linear-gradient(135deg, #667eea 0%, #764ba2 50%, #f093fb 100%);
border-radius: 20px;
margin-bottom: 2rem;
color: white;
box-shadow: 0 10px 40px rgba(102, 126, 234, 0.3);
position: relative;
overflow: hidden;
}
.main-header::before {
content: '';
position: absolute;
top: 0;
left: 0;
right: 0;
bottom: 0;
background: url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100"><circle cx="20" cy="20" r="2" fill="rgba(255,255,255,0.1)"/><circle cx="80" cy="40" r="3" fill="rgba(255,255,255,0.1)"/><circle cx="40" cy="80" r="2" fill="rgba(255,255,255,0.1)"/></svg>');
pointer-events: none;
}
.main-header h1 {
margin: 0;
font-size: 2.5rem;
font-weight: 700;
text-shadow: 0 2px 10px rgba(0,0,0,0.2);
letter-spacing: -0.5px;
}
.main-header p {
margin: 0.5rem 0 0 0;
opacity: 0.95;
font-size: 1.1rem;
font-weight: 400;
}
/* Sidebar styling */
section[data-testid="stSidebar"] {
background: linear-gradient(180deg, #1a1a2e 0%, #16213e 100%);
}
section[data-testid="stSidebar"] .stMarkdown h1,
section[data-testid="stSidebar"] .stMarkdown h2,
section[data-testid="stSidebar"] .stMarkdown h3 {
color: #e0e0e0 !important;
}
/* File uploader styling */
.stFileUploader > div {
border: 2px dashed #667eea !important;
border-radius: 16px !important;
background: linear-gradient(135deg, rgba(102,126,234,0.05) 0%, rgba(118,75,162,0.05) 100%) !important;
transition: all 0.3s ease;
}
.stFileUploader > div:hover {
border-color: #764ba2 !important;
background: linear-gradient(135deg, rgba(102,126,234,0.1) 0%, rgba(118,75,162,0.1) 100%) !important;
}
/* Chat message styling */
.stChatMessage {
padding: 1rem !important;
border-radius: 16px !important;
margin-bottom: 1rem !important;
}
/* User message */
[data-testid="stChatMessage"]:has([data-testid="chatAvatarIcon-user"]) {
background: linear-gradient(135deg, #2d3748 0%, #1a202c 100%) !important;
border-left: 4px solid #667eea !important;
}
/* Assistant message */
[data-testid="stChatMessage"]:has([data-testid="chatAvatarIcon-assistant"]) {
background: linear-gradient(135deg, #1e1e2e 0%, #252540 100%) !important;
border-left: 4px solid #764ba2 !important;
}
/* Data preview card */
.data-preview {
background: linear-gradient(135deg, #1a1a2e 0%, #252540 100%);
border-radius: 16px;
padding: 1.5rem;
margin: 1rem 0;
border: 1px solid rgba(102, 126, 234, 0.2);
box-shadow: 0 4px 20px rgba(0, 0, 0, 0.2);
}
.data-preview-header {
display: flex;
align-items: center;
gap: 10px;
margin-bottom: 1rem;
padding-bottom: 0.75rem;
border-bottom: 1px solid rgba(255,255,255,0.1);
}
.data-preview-icon {
font-size: 1.5rem;
}
.data-preview-title {
font-weight: 600;
color: #e0e0e0;
font-size: 1rem;
}
.data-preview-meta {
color: #888;
font-size: 0.85rem;
margin-left: auto;
}
/* Analysis status */
.analysis-status {
display: flex;
align-items: center;
gap: 12px;
padding: 1rem 1.5rem;
background: linear-gradient(135deg, rgba(102,126,234,0.15) 0%, rgba(118,75,162,0.15) 100%);
border-radius: 12px;
border: 1px solid rgba(102, 126, 234, 0.3);
margin: 1rem 0;
}
.analysis-spinner {
width: 24px;
height: 24px;
border: 3px solid rgba(102, 126, 234, 0.3);
border-top-color: #667eea;
border-radius: 50%;
animation: spin 1s linear infinite;
}
@keyframes spin {
to { transform: rotate(360deg); }
}
.analysis-text {
color: #e0e0e0;
font-weight: 500;
}
/* Chart container */
.chart-container {
background: linear-gradient(135deg, #1e1e2e 0%, #252540 100%);
border-radius: 16px;
padding: 1.5rem;
margin: 1.5rem 0;
border: 1px solid rgba(102, 126, 234, 0.2);
box-shadow: 0 8px 32px rgba(0, 0, 0, 0.3);
}
.chart-title {
color: #e0e0e0;
font-weight: 600;
margin-bottom: 1rem;
display: flex;
align-items: center;
gap: 8px;
}
/* Result card */
.result-card {
background: linear-gradient(135deg, #1e1e2e 0%, #252540 100%);
border-radius: 16px;
padding: 1.5rem;
margin: 1rem 0;
border: 1px solid rgba(102, 126, 234, 0.2);
}
/* Expander styling */
.stExpander {
background: linear-gradient(135deg, #1a1a2e 0%, #252540 100%) !important;
border: 1px solid rgba(102, 126, 234, 0.2) !important;
border-radius: 12px !important;
}
/* Button styling */
.stButton > button {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
border: none !important;
border-radius: 10px !important;
padding: 0.6rem 1.5rem !important;
font-weight: 600 !important;
transition: all 0.3s ease !important;
box-shadow: 0 4px 15px rgba(102, 126, 234, 0.3) !important;
}
.stButton > button:hover {
transform: translateY(-2px) !important;
box-shadow: 0 6px 20px rgba(102, 126, 234, 0.4) !important;
}
/* Input styling */
.stTextInput > div > div > input,
.stChatInput > div > div > textarea {
background: #1a1a2e !important;
border: 2px solid rgba(102, 126, 234, 0.3) !important;
border-radius: 12px !important;
color: #e0e0e0 !important;
}
.stTextInput > div > div > input:focus,
.stChatInput > div > div > textarea:focus {
border-color: #667eea !important;
box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.2) !important;
}
/* Audio input styling */
.stAudioInput > div {
background: linear-gradient(135deg, rgba(102,126,234,0.1) 0%, rgba(118,75,162,0.1) 100%) !important;
border-radius: 12px !important;
border: 2px solid rgba(102, 126, 234, 0.3) !important;
}
/* Divider */
hr {
border: none !important;
height: 1px !important;
background: linear-gradient(90deg, transparent, rgba(102, 126, 234, 0.3), transparent) !important;
margin: 2rem 0 !important;
}
/* Success message */
.stSuccess {
background: linear-gradient(135deg, rgba(16, 185, 129, 0.1) 0%, rgba(5, 150, 105, 0.1) 100%) !important;
border: 1px solid rgba(16, 185, 129, 0.3) !important;
border-radius: 12px !important;
}
/* Info message */
.stInfo {
background: linear-gradient(135deg, rgba(102,126,234,0.1) 0%, rgba(118,75,162,0.1) 100%) !important;
border: 1px solid rgba(102, 126, 234, 0.3) !important;
border-radius: 12px !important;
}
/* Warning message */
.stWarning {
background: linear-gradient(135deg, rgba(245, 158, 11, 0.1) 0%, rgba(217, 119, 6, 0.1) 100%) !important;
border: 1px solid rgba(245, 158, 11, 0.3) !important;
border-radius: 12px !important;
}
/* DataFrame styling */
.stDataFrame {
border-radius: 12px !important;
overflow: hidden !important;
}
/* Footer */
.footer {
text-align: center;
padding: 1.5rem;
color: #666;
font-size: 0.85rem;
}
.footer a {
color: #667eea;
text-decoration: none;
}
/* Insight cards */
.insight-card {
background: linear-gradient(135deg, #1e1e2e 0%, #252540 100%);
border-radius: 12px;
padding: 1rem 1.25rem;
margin: 0.5rem 0;
border-left: 4px solid #667eea;
box-shadow: 0 2px 10px rgba(0,0,0,0.2);
}
.insight-card.warning {
border-left-color: #f59e0b;
}
.insight-card.success {
border-left-color: #10b981;
}
/* Hide streamlit branding */
#MainMenu {visibility: hidden;}
footer {visibility: hidden;}
/* Smooth scrolling */
html {
scroll-behavior: smooth;
}
</style>
""", unsafe_allow_html=True)
# Header
st.markdown("""
<div class="main-header">
<h1>πŸ“Š Data Analyst Agent</h1>
<p>Upload your data β€’ Ask questions in natural language β€’ Get instant insights & visualizations</p>
</div>
""", unsafe_allow_html=True)
# Sidebar
with st.sidebar:
st.markdown("## βš™οΈ Configuration")
st.markdown("---")
default_key = os.environ.get("POE_API_KEY", "")
api_key = st.text_input(
"πŸ”‘ Poe API Key",
type="password",
value=st.session_state.get("api_key", default_key),
help="Enter your Poe API key to connect to the agent"
)
if api_key:
st.session_state.api_key = api_key
bot_name = st.text_input(
"πŸ€– Bot Name",
value=st.session_state.get("bot_name", "Transmed-Agent"),
help="The name of the Poe bot to use for analysis"
)
if bot_name:
st.session_state.bot_name = bot_name
st.markdown("---")
st.markdown("### 🎯 Quick Actions")
if st.button("πŸ—‘οΈ Clear Conversation", use_container_width=True):
st.session_state.messages = []
st.session_state.uploaded_file_data = None
st.session_state.uploaded_file_name = None
st.session_state.df_preview = None
for f in TEMP_DIR.glob("*"):
try: f.unlink()
except: pass
st.rerun()
st.markdown("---")
st.markdown("""
### πŸ’‘ Tips
- Upload CSV or Excel files
- Ask questions naturally
- Use voice for hands-free input
- Get charts & insights automatically
""")
# Session state
if "messages" not in st.session_state:
st.session_state.messages = []
if "uploaded_file_data" not in st.session_state:
st.session_state.uploaded_file_data = None
if "uploaded_file_name" not in st.session_state:
st.session_state.uploaded_file_name = None
if "df_preview" not in st.session_state:
st.session_state.df_preview = None
# File upload section
st.markdown("### πŸ“ Upload Your Data")
uploaded_file = st.file_uploader(
"Drag and drop your CSV or Excel file here",
type=["csv", "xlsx", "xls"],
help="Supported formats: CSV, XLSX, XLS"
)
if uploaded_file:
file_bytes = uploaded_file.read()
st.session_state.uploaded_file_data = file_bytes
st.session_state.uploaded_file_name = uploaded_file.name
uploaded_file.seek(0)
try:
if uploaded_file.name.endswith(".csv"):
df = pd.read_csv(uploaded_file)
else:
df = pd.read_excel(uploaded_file)
st.session_state.df_preview = df
# Beautiful data preview
st.markdown(f"""
<div class="data-preview">
<div class="data-preview-header">
<span class="data-preview-icon">πŸ“Š</span>
<span class="data-preview-title">{uploaded_file.name}</span>
<span class="data-preview-meta">{df.shape[0]:,} rows Γ— {df.shape[1]} columns</span>
</div>
</div>
""", unsafe_allow_html=True)
with st.expander("πŸ” Preview Data", expanded=True):
st.dataframe(
df.head(5),
use_container_width=True,
hide_index=True
)
# Quick stats
col1, col2, col3, col4 = st.columns(4)
with col1:
st.metric("πŸ“ Columns", df.shape[1])
with col2:
st.metric("πŸ“Š Rows", f"{df.shape[0]:,}")
with col3:
st.metric("πŸ”’ Numeric", len(df.select_dtypes(include=['number']).columns))
with col4:
st.metric("πŸ“… Missing", f"{df.isnull().sum().sum():,}")
except Exception as e:
st.warning(f"⚠️ Could not preview file: {e}")
def transcribe_audio(audio_bytes: bytes) -> str:
"""Transcribe audio using OpenAI Whisper."""
try:
client = openai.OpenAI(api_key=OPENAI_API_KEY)
audio_file = io.BytesIO(audio_bytes)
audio_file.name = "audio.wav"
response = client.audio.transcriptions.create(
model="whisper-1",
file=audio_file
)
return response.text.strip()
except Exception as e:
st.error(f"🎀 Transcription error: {e}")
return None
def download_file(url: str) -> tuple:
"""Download file and return (filepath, content_type)."""
try:
response = requests.get(url, timeout=30)
response.raise_for_status()
content_type = response.headers.get("content-type", "")
filename = f"file_{abs(hash(url)) % 100000}"
if "png" in content_type:
filename += ".png"
elif "jpeg" in content_type or "jpg" in content_type:
filename += ".jpg"
elif "html" in content_type:
filename += ".html"
elif "pdf" in content_type:
filename += ".pdf"
else:
filename += ".bin"
filepath = TEMP_DIR / filename
filepath.write_bytes(response.content)
return filepath, content_type
except:
return None, None
def is_noise_line(line: str) -> bool:
"""Check if a line is agent execution noise that should be filtered."""
line_stripped = line.strip()
# Empty or whitespace-only lines
if not line_stripped:
return False # Keep empty lines for formatting
# Running/Thinking status patterns
noise_patterns = [
r'^[:\.\*\s●○◐◑◒◓β–ͺβ–«]+$', # Just dots, bullets, asterisks
r'Running\.{2,}',
r'Thinking\.{2,}',
r'^●\s*\*+',
r'^●\s*Bash',
r'^●\s*Read',
r'^●\s*Write',
r'^●\s*Edit',
r'^●\s*Glob',
r'^●\s*Grep',
r'^πŸ”',
r'^🟒\s*\*\*',
r'^🟑',
r'^πŸ”΅',
r'^βšͺ',
r'^>\s*```',
r'^>\s*Read \*\*\d+\*\*',
r'^\s*Bash\s*[●○◐◑◒◓]',
r'^\s*Bash\(',
r'^\s*●\s*Bash\(',
r'\*\*Bash\*\*',
r'cd /root/workdir',
r'python3 <<',
r"<< 'EOF'",
r'^EOF$',
r'^\s*\.\s*$',
r'^\s*\.\.\.\s*$',
r'^\s*:\s*$',
r'^\s*\*+\s*$',
]
for pattern in noise_patterns:
if re.search(pattern, line_stripped, re.IGNORECASE):
return True
# Agent status messages
noise_phrases = [
'Starting data analysis agent',
'The agent can analyze your data',
"I'll analyze the Excel file for you",
"I'll analyze the CSV file for you",
'Let me first explore the data',
'Let me analyze',
'Let me examine',
'Let me look at',
'import pandas as pd',
'pd.read_excel',
'pd.read_csv',
'nprint(',
'\\nprint(',
'workdir &&',
]
for phrase in noise_phrases:
if phrase.lower() in line_stripped.lower():
return True
return False
def clean_bot_response(content: str) -> str:
"""Aggressively clean bot response of all agent execution noise."""
if not content:
return ""
# Remove code blocks containing agent commands
content = re.sub(r'```(?:console|bash|shell|python)[\s\S]*?```', '', content)
# Remove inline code with agent commands
content = re.sub(r'`cd /root/workdir[^`]*`', '', content)
content = re.sub(r'`python3[^`]*`', '', content)
# Remove Bash command patterns with the ● indicator
content = re.sub(r'●\s*Bash\([^)]*\)', '', content)
content = re.sub(r'Bash\([^)]*\)', '', content)
# Remove Running.../Thinking... sequences
content = re.sub(r'[:\s]*Running\.{2,}[:\s]*', '', content)
content = re.sub(r'[:\s]*Thinking\.{2,}[:\s]*', '', content)
# Remove status bullet patterns
content = re.sub(r'●\s*\*+[^*\n]*\*+', '', content)
# Split and filter lines
lines = content.split('\n')
clean_lines = []
for line in lines:
if not is_noise_line(line):
clean_lines.append(line)
result = '\n'.join(clean_lines)
# Clean up excessive newlines
result = re.sub(r'\n{4,}', '\n\n\n', result)
# Clean up leading/trailing whitespace on each line while preserving structure
lines = result.split('\n')
result = '\n'.join(line.rstrip() for line in lines)
return result.strip()
def format_analysis_content(content: str) -> str:
"""Format the cleaned content for beautiful display."""
# Add styling to headers
content = re.sub(r'^(#{1,3})\s+(.+)$', r'\1 \2', content, flags=re.MULTILINE)
# Format key findings with cards
content = re.sub(
r'^[-β€’]\s+\*\*([^*]+)\*\*:\s*(.+)$',
r'<div class="insight-card"><strong>\1:</strong> \2</div>',
content,
flags=re.MULTILINE
)
return content
def render_response(content: str):
"""Render response with proper chart and file handling."""
if not content:
return
# Pattern for Poe CDN URLs
url_pattern = r'(https://pfst\.cf2\.poecdn\.net/[^\s\)\]\>\"\'\,]+)'
# Find all URLs
urls = re.findall(url_pattern, content)
# Clean content
clean_content = clean_bot_response(content)
# Remove URLs from text (we'll render them separately)
for url in urls:
clean_content = clean_content.replace(url, '')
# Also remove markdown image syntax
clean_content = re.sub(rf'!\[[^\]]*\]\({re.escape(url)}[^\)]*\)', '', clean_content)
# Clean up extra whitespace
clean_content = re.sub(r'\n{3,}', '\n\n', clean_content)
clean_content = clean_content.strip()
# Render cleaned text with nice formatting
if clean_content:
# Format the content
formatted = format_analysis_content(clean_content)
st.markdown(formatted, unsafe_allow_html=True)
# Process and render files/charts
rendered_urls = set()
for url in urls:
clean_url = url.rstrip('.,;:)>]"\'')
if clean_url in rendered_urls:
continue
rendered_urls.add(clean_url)
filepath, content_type = download_file(clean_url)
if filepath:
suffix = filepath.suffix.lower()
if suffix in ['.png', '.jpg', '.jpeg', '.gif', '.webp']:
# Render image/chart with nice container
st.markdown('<div class="chart-container">', unsafe_allow_html=True)
st.markdown('<div class="chart-title">πŸ“ˆ Generated Visualization</div>', unsafe_allow_html=True)
st.image(str(filepath), use_container_width=True)
st.markdown('</div>', unsafe_allow_html=True)
elif suffix == '.html':
# Render HTML chart (Plotly)
st.markdown('<div class="chart-container">', unsafe_allow_html=True)
st.markdown('<div class="chart-title">πŸ“Š Interactive Chart</div>', unsafe_allow_html=True)
html_content = filepath.read_text()
st.components.v1.html(html_content, height=500, scrolling=True)
st.markdown('</div>', unsafe_allow_html=True)
else:
# Show file info with clickable download link
st.markdown(f"""
<div class="result-card">
<p>πŸ“Ž <strong>Generated File:</strong> <a href="{clean_url}" target="_blank" download style="color: #667eea; text-decoration: none; font-family: monospace; background: rgba(102,126,234,0.1); padding: 4px 8px; border-radius: 6px;">{filepath.name}</a></p>
</div>
""", unsafe_allow_html=True)
def call_poe_api(message: str, file_bytes: bytes = None, filename: str = None) -> str:
"""Call Poe API WITHOUT streaming - returns only the final complete response."""
api_key = st.session_state.get("api_key", "")
bot_name = st.session_state.get("bot_name", "Transmed-Agent")
if not api_key:
return "❌ Please enter your Poe API key in the sidebar."
client = openai.OpenAI(
api_key=api_key,
base_url="https://api.poe.com/v1",
)
# Build content
if file_bytes and filename:
ct = "text/csv" if filename.lower().endswith(".csv") else "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
file_b64 = base64.b64encode(file_bytes).decode()
content = [
{"type": "text", "text": message},
{"type": "file", "file": {"file_data": f"data:{ct};base64,{file_b64}", "filename": filename}}
]
else:
content = message
try:
# NON-STREAMING call - waits for complete response
response = client.chat.completions.create(
model=bot_name,
messages=[{"role": "user", "content": content}],
stream=False # Disabled streaming!
)
return response.choices[0].message.content or ""
except Exception as e:
return f"❌ Error: {str(e)}"
# Display chat history
if st.session_state.messages:
st.markdown("### πŸ’¬ Conversation")
for msg in st.session_state.messages:
with st.chat_message(msg["role"], avatar="πŸ§‘β€πŸ’Ό" if msg["role"] == "user" else "πŸ€–"):
if msg["role"] == "assistant":
render_response(msg["content"])
else:
st.markdown(msg["content"])
# Input area
st.markdown("---")
st.markdown("### πŸ’­ Ask a Question")
col1, col2 = st.columns([4, 1])
with col1:
text_input = st.chat_input("What would you like to know about your data?")
with col2:
audio = st.audio_input("🎀 Voice", help="Click to record your question")
# Handle audio input
prompt = None
if audio is not None:
st.audio(audio)
with st.spinner("🎀 Transcribing your voice..."):
audio_bytes = audio.getvalue()
transcribed = transcribe_audio(audio_bytes)
if transcribed:
st.success(f'🎀 You said: "{transcribed}"')
prompt = transcribed
# Handle text input
if text_input:
prompt = text_input
# Process prompt
if prompt:
if not st.session_state.uploaded_file_data:
st.warning("⚠️ Please upload a data file first to begin analysis.")
else:
# User message
user_msg = f"πŸ“Ž **{st.session_state.uploaded_file_name}**\n\n{prompt}"
st.session_state.messages.append({"role": "user", "content": user_msg})
with st.chat_message("user", avatar="πŸ§‘β€πŸ’Ό"):
st.markdown(user_msg)
# Bot response - NO STREAMING, wait for complete response
with st.chat_message("assistant", avatar="πŸ€–"):
# Show analysis status with spinner
with st.spinner("πŸ” Analyzing your data... This may take a moment."):
# Call API without streaming - waits for complete response
full_response = call_poe_api(
prompt,
st.session_state.uploaded_file_data,
st.session_state.uploaded_file_name
)
# Render only the clean final response
render_response(full_response)
st.session_state.messages.append({"role": "assistant", "content": full_response})
# Footer
st.markdown("---")
st.markdown("""
<div class="footer">
<p>Powered by <strong>Poe API</strong> & <strong>OpenAI Whisper</strong> | Built with ❀️ using Streamlit</p>
</div>
""", unsafe_allow_html=True)