""" Data Analyst Agent - Streamlit App Features: Streaming, Audio Input (OpenAI Whisper), Chart Rendering """ import streamlit as st import openai import base64 import re import os import io import tempfile import requests import pandas as pd from pathlib import Path # Page config st.set_page_config( page_title="Data Analyst Agent for Transmed", page_icon="📊", layout="wide" ) # API Keys OPENAI_API_KEY = "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" # Temp directory TEMP_DIR = Path(tempfile.gettempdir()) / "data_analyst" TEMP_DIR.mkdir(exist_ok=True) # Custom CSS for stunning UI st.markdown(""" """, unsafe_allow_html=True) # Header st.markdown("""

📊 Data Analyst Agent

Upload your data • Ask questions in natural language • Get instant insights & visualizations

""", unsafe_allow_html=True) # Sidebar with st.sidebar: st.markdown("## ⚙️ Configuration") st.markdown("---") default_key = os.environ.get("POE_API_KEY", "") api_key = st.text_input( "🔑 Poe API Key", type="password", value=st.session_state.get("api_key", default_key), help="Enter your Poe API key to connect to the agent" ) if api_key: st.session_state.api_key = api_key bot_name = st.text_input( "🤖 Bot Name", value=st.session_state.get("bot_name", "Transmed-Agent"), help="The name of the Poe bot to use for analysis" ) if bot_name: st.session_state.bot_name = bot_name st.markdown("---") st.markdown("### 🎯 Quick Actions") if st.button("🗑️ Clear Conversation", use_container_width=True): st.session_state.messages = [] st.session_state.uploaded_file_data = None st.session_state.uploaded_file_name = None st.session_state.df_preview = None for f in TEMP_DIR.glob("*"): try: f.unlink() except: pass st.rerun() st.markdown("---") st.markdown(""" ### 💡 Tips - Upload CSV or Excel files - Ask questions naturally - Use voice for hands-free input - Get charts & insights automatically """) # Session state if "messages" not in st.session_state: st.session_state.messages = [] if "uploaded_file_data" not in st.session_state: st.session_state.uploaded_file_data = None if "uploaded_file_name" not in st.session_state: st.session_state.uploaded_file_name = None if "df_preview" not in st.session_state: st.session_state.df_preview = None # File upload section st.markdown("### 📁 Upload Your Data") uploaded_file = st.file_uploader( "Drag and drop your CSV or Excel file here", type=["csv", "xlsx", "xls"], help="Supported formats: CSV, XLSX, XLS" ) if uploaded_file: file_bytes = uploaded_file.read() st.session_state.uploaded_file_data = file_bytes st.session_state.uploaded_file_name = uploaded_file.name uploaded_file.seek(0) try: if uploaded_file.name.endswith(".csv"): df = pd.read_csv(uploaded_file) else: df = pd.read_excel(uploaded_file) st.session_state.df_preview = df # Beautiful data preview st.markdown(f"""

📊 {uploaded_file.name} {df.shape[0]:,} rows × {df.shape[1]} columns

""", unsafe_allow_html=True) with st.expander("🔍 Preview Data", expanded=True): st.dataframe( df.head(5), use_container_width=True, hide_index=True ) # Quick stats col1, col2, col3, col4 = st.columns(4) with col1: st.metric("📝 Columns", df.shape[1]) with col2: st.metric("📊 Rows", f"{df.shape[0]:,}") with col3: st.metric("🔢 Numeric", len(df.select_dtypes(include=['number']).columns)) with col4: st.metric("📅 Missing", f"{df.isnull().sum().sum():,}") except Exception as e: st.warning(f"⚠️ Could not preview file: {e}") def transcribe_audio(audio_bytes: bytes) -> str: """Transcribe audio using OpenAI Whisper.""" try: client = openai.OpenAI(api_key=OPENAI_API_KEY) audio_file = io.BytesIO(audio_bytes) audio_file.name = "audio.wav" response = client.audio.transcriptions.create( model="whisper-1", file=audio_file ) return response.text.strip() except Exception as e: st.error(f"🎤 Transcription error: {e}") return None def download_file(url: str) -> tuple: """Download file and return (filepath, content_type).""" try: response = requests.get(url, timeout=30) response.raise_for_status() content_type = response.headers.get("content-type", "") filename = f"file_{abs(hash(url)) % 100000}" if "png" in content_type: filename += ".png" elif "jpeg" in content_type or "jpg" in content_type: filename += ".jpg" elif "html" in content_type: filename += ".html" elif "pdf" in content_type: filename += ".pdf" else: filename += ".bin" filepath = TEMP_DIR / filename filepath.write_bytes(response.content) return filepath, content_type except: return None, None def is_noise_line(line: str) -> bool: """Check if a line is agent execution noise that should be filtered.""" line_stripped = line.strip() # Empty or whitespace-only lines if not line_stripped: return False # Keep empty lines for formatting # Running/Thinking status patterns noise_patterns = [ r'^[:\.\*\s●○◐◑◒◓▪▫]+$', # Just dots, bullets, asterisks r'Running\.{2,}', r'Thinking\.{2,}', r'^●\s*\*+', r'^●\s*Bash', r'^●\s*Read', r'^●\s*Write', r'^●\s*Edit', r'^●\s*Glob', r'^●\s*Grep', r'^🔍', r'^🟢\s*\*\*', r'^🟡', r'^🔵', r'^⚪', r'^>\s*```', r'^>\s*Read \*\*\d+\*\*', r'^\s*Bash\s*[●○◐◑◒◓]', r'^\s*Bash\(', r'^\s*●\s*Bash\(', r'\*\*Bash\*\*', r'cd /root/workdir', r'python3 <<', r"<< 'EOF'", r'^EOF$', r'^\s*\.\s*$', r'^\s*\.\.\.\s*$', r'^\s*:\s*$', r'^\s*\*+\s*$', ] for pattern in noise_patterns: if re.search(pattern, line_stripped, re.IGNORECASE): return True # Agent status messages noise_phrases = [ 'Starting data analysis agent', 'The agent can analyze your data', "I'll analyze the Excel file for you", "I'll analyze the CSV file for you", 'Let me first explore the data', 'Let me analyze', 'Let me examine', 'Let me look at', 'import pandas as pd', 'pd.read_excel', 'pd.read_csv', 'nprint(', '\\nprint(', 'workdir &&', ] for phrase in noise_phrases: if phrase.lower() in line_stripped.lower(): return True return False def clean_bot_response(content: str) -> str: """Aggressively clean bot response of all agent execution noise.""" if not content: return "" # Remove code blocks containing agent commands content = re.sub(r'```(?:console|bash|shell|python)[\s\S]*?```', '', content) # Remove inline code with agent commands content = re.sub(r'`cd /root/workdir[^`]*`', '', content) content = re.sub(r'`python3[^`]*`', '', content) # Remove Bash command patterns with the ● indicator content = re.sub(r'●\s*Bash$[^)]*$', '', content) content = re.sub(r'Bash$[^)]*$', '', content) # Remove Running.../Thinking... sequences content = re.sub(r'[:\s]*Running\.{2,}[:\s]*', '', content) content = re.sub(r'[:\s]*Thinking\.{2,}[:\s]*', '', content) # Remove status bullet patterns content = re.sub(r'●\s*\*+[^*\n]*\*+', '', content) # Split and filter lines lines = content.split('\n') clean_lines = [] for line in lines: if not is_noise_line(line): clean_lines.append(line) result = '\n'.join(clean_lines) # Clean up excessive newlines result = re.sub(r'\n{4,}', '\n\n\n', result) # Clean up leading/trailing whitespace on each line while preserving structure lines = result.split('\n') result = '\n'.join(line.rstrip() for line in lines) return result.strip() def format_analysis_content(content: str) -> str: """Format the cleaned content for beautiful display.""" # Add styling to headers content = re.sub(r'^(#{1,3})\s+(.+)$', r'\1 \2', content, flags=re.MULTILINE) # Format key findings with cards content = re.sub( r'^[-•]\s+\*\*([^*]+)\*\*:\s*(.+)$', r'

\1: \2

', content, flags=re.MULTILINE ) return content def render_response(content: str): """Render response with proper chart and file handling.""" if not content: return # Pattern for Poe CDN URLs url_pattern = r'(https://pfst\.cf2\.poecdn\.net/[^\s\)\]\>\"\'\,]+)' # Find all URLs urls = re.findall(url_pattern, content) # Clean content clean_content = clean_bot_response(content) # Remove URLs from text (we'll render them separately) for url in urls: clean_content = clean_content.replace(url, '') # Also remove markdown image syntax clean_content = re.sub(rf'!\[[^\]]*\]${re.escape(url)}[^$]*\)', '', clean_content) # Clean up extra whitespace clean_content = re.sub(r'\n{3,}', '\n\n', clean_content) clean_content = clean_content.strip() # Render cleaned text with nice formatting if clean_content: # Format the content formatted = format_analysis_content(clean_content) st.markdown(formatted, unsafe_allow_html=True) # Process and render files/charts rendered_urls = set() for url in urls: clean_url = url.rstrip('.,;:)>]"\'') if clean_url in rendered_urls: continue rendered_urls.add(clean_url) filepath, content_type = download_file(clean_url) if filepath: suffix = filepath.suffix.lower() if suffix in ['.png', '.jpg', '.jpeg', '.gif', '.webp']: # Render image/chart with nice container st.markdown('

', unsafe_allow_html=True) st.markdown('

📈 Generated Visualization

', unsafe_allow_html=True) st.image(str(filepath), use_container_width=True) st.markdown('

', unsafe_allow_html=True) elif suffix == '.html': # Render HTML chart (Plotly) st.markdown('

', unsafe_allow_html=True) st.markdown('

📊 Interactive Chart

', unsafe_allow_html=True) html_content = filepath.read_text() st.components.v1.html(html_content, height=500, scrolling=True) st.markdown('

', unsafe_allow_html=True) else: # Show file info with clickable download link st.markdown(f"""

📎 Generated File: {filepath.name}

""", unsafe_allow_html=True) def call_poe_api(message: str, file_bytes: bytes = None, filename: str = None) -> str: """Call Poe API WITHOUT streaming - returns only the final complete response.""" api_key = st.session_state.get("api_key", "") bot_name = st.session_state.get("bot_name", "Transmed-Agent") if not api_key: return "❌ Please enter your Poe API key in the sidebar." client = openai.OpenAI( api_key=api_key, base_url="https://api.poe.com/v1", ) # Build content if file_bytes and filename: ct = "text/csv" if filename.lower().endswith(".csv") else "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" file_b64 = base64.b64encode(file_bytes).decode() content = [ {"type": "text", "text": message}, {"type": "file", "file": {"file_data": f"data:{ct};base64,{file_b64}", "filename": filename}} ] else: content = message try: # NON-STREAMING call - waits for complete response response = client.chat.completions.create( model=bot_name, messages=[{"role": "user", "content": content}], stream=False # Disabled streaming! ) return response.choices[0].message.content or "" except Exception as e: return f"❌ Error: {str(e)}" # Display chat history if st.session_state.messages: st.markdown("### 💬 Conversation") for msg in st.session_state.messages: with st.chat_message(msg["role"], avatar="🧑‍💼" if msg["role"] == "user" else "🤖"): if msg["role"] == "assistant": render_response(msg["content"]) else: st.markdown(msg["content"]) # Input area st.markdown("---") st.markdown("### 💭 Ask a Question") col1, col2 = st.columns([4, 1]) with col1: text_input = st.chat_input("What would you like to know about your data?") with col2: audio = st.audio_input("🎤 Voice", help="Click to record your question") # Handle audio input prompt = None if audio is not None: st.audio(audio) with st.spinner("🎤 Transcribing your voice..."): audio_bytes = audio.getvalue() transcribed = transcribe_audio(audio_bytes) if transcribed: st.success(f'🎤 You said: "{transcribed}"') prompt = transcribed # Handle text input if text_input: prompt = text_input # Process prompt if prompt: if not st.session_state.uploaded_file_data: st.warning("⚠️ Please upload a data file first to begin analysis.") else: # User message user_msg = f"📎 **{st.session_state.uploaded_file_name}**\n\n{prompt}" st.session_state.messages.append({"role": "user", "content": user_msg}) with st.chat_message("user", avatar="🧑‍💼"): st.markdown(user_msg) # Bot response - NO STREAMING, wait for complete response with st.chat_message("assistant", avatar="🤖"): # Show analysis status with spinner with st.spinner("🔍 Analyzing your data... This may take a moment."): # Call API without streaming - waits for complete response full_response = call_poe_api( prompt, st.session_state.uploaded_file_data, st.session_state.uploaded_file_name ) # Render only the clean final response render_response(full_response) st.session_state.messages.append({"role": "assistant", "content": full_response}) # Footer st.markdown("---") st.markdown(""" """, unsafe_allow_html=True)