Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import anthropic, openai, base64, cv2, glob, json, math, os, pytz, random, re, requests, time, zipfile | |
| import plotly.graph_objects as go | |
| import streamlit.components.v1 as components | |
| from datetime import datetime | |
| from audio_recorder_streamlit import audio_recorder | |
| from bs4 import BeautifulSoup | |
| from collections import defaultdict, deque | |
| from dotenv import load_dotenv | |
| from gradio_client import Client | |
| from huggingface_hub import InferenceClient | |
| from io import BytesIO | |
| from PIL import Image | |
| from PyPDF2 import PdfReader | |
| from urllib.parse import quote | |
| from xml.etree import ElementTree as ET | |
| from openai import OpenAI | |
| import extra_streamlit_components as stx | |
| from streamlit.runtime.scriptrunner import get_script_run_ctx | |
| import asyncio | |
| import edge_tts | |
| # Available English voices | |
| ENGLISH_VOICES = [ | |
| "en-US-AriaNeural", # Female, conversational | |
| "en-US-JennyNeural", # Female, customer service | |
| "en-US-GuyNeural", # Male, newscast | |
| "en-US-RogerNeural", # Male, calm | |
| "en-GB-SoniaNeural", # British female | |
| "en-GB-RyanNeural", # British male | |
| "en-AU-NatashaNeural", # Australian female | |
| "en-AU-WilliamNeural", # Australian male | |
| "en-CA-ClaraNeural", # Canadian female | |
| "en-CA-LiamNeural", # Canadian male | |
| "en-IE-EmilyNeural", # Irish female | |
| "en-IE-ConnorNeural", # Irish male | |
| "en-IN-NeerjaNeural", # Indian female | |
| "en-IN-PrabhatNeural", # Indian male | |
| ] | |
| # Core Configuration & Setup | |
| st.set_page_config( | |
| page_title="ARIA Research Assistant", | |
| page_icon="🔬", | |
| layout="wide", | |
| initial_sidebar_state="auto", | |
| menu_items={ | |
| 'Get Help': 'https://huggingface.co/awacke1', | |
| 'Report a bug': 'https://huggingface.co/spaces/awacke1', | |
| 'About': "ARIA: Academic Research Interactive Assistant" | |
| } | |
| ) | |
| load_dotenv() | |
| # API Setup | |
| openai_api_key = os.getenv('OPENAI_API_KEY', st.secrets.get('OPENAI_API_KEY', '')) | |
| anthropic_key = os.getenv('ANTHROPIC_API_KEY_3', st.secrets.get('ANTHROPIC_API_KEY', '')) | |
| openai_client = OpenAI(api_key=openai_api_key) | |
| claude_client = anthropic.Anthropic(api_key=anthropic_key) | |
| # Session State Management | |
| if 'transcript_history' not in st.session_state: | |
| st.session_state['transcript_history'] = [] | |
| if 'chat_history' not in st.session_state: | |
| st.session_state['chat_history'] = [] | |
| if 'openai_model' not in st.session_state: | |
| st.session_state['openai_model'] = "gpt-4-vision-preview" | |
| if 'messages' not in st.session_state: | |
| st.session_state['messages'] = [] | |
| if 'last_voice_input' not in st.session_state: | |
| st.session_state['last_voice_input'] = "" | |
| if 'current_audio' not in st.session_state: | |
| st.session_state['current_audio'] = None | |
| if 'autoplay_audio' not in st.session_state: | |
| st.session_state['autoplay_audio'] = True | |
| if 'should_rerun' not in st.session_state: | |
| st.session_state['should_rerun'] = False | |
| if 'autorun' not in st.session_state: | |
| st.session_state.autorun = True | |
| if 'run_option' not in st.session_state: | |
| st.session_state.run_option = "Arxiv" | |
| if 'last_processed_text' not in st.session_state: | |
| st.session_state.last_processed_text = "" | |
| # Custom CSS | |
| st.markdown(""" | |
| <style> | |
| .main { | |
| background: linear-gradient(135deg, #1a1a1a, #2d2d2d); | |
| color: #ffffff; | |
| } | |
| .stMarkdown { | |
| font-family: 'Helvetica Neue', sans-serif; | |
| } | |
| .stButton>button { | |
| background-color: #4CAF50; | |
| color: white; | |
| padding: 0.5rem 1rem; | |
| border-radius: 5px; | |
| border: none; | |
| transition: background-color 0.3s; | |
| } | |
| .stButton>button:hover { | |
| background-color: #45a049; | |
| } | |
| .audio-player { | |
| margin: 1rem 0; | |
| padding: 1rem; | |
| border-radius: 10px; | |
| background: #f5f5f5; | |
| box-shadow: 0 2px 4px rgba(0,0,0,0.1); | |
| } | |
| .voice-container { | |
| padding: 1rem; | |
| background: white; | |
| border-radius: 10px; | |
| margin: 1rem 0; | |
| } | |
| .text-display { | |
| margin: 1rem 0; | |
| padding: 1rem; | |
| background: #f9f9f9; | |
| border-radius: 5px; | |
| font-size: 1.1em; | |
| } | |
| .model-selector { | |
| margin: 1rem 0; | |
| padding: 0.5rem; | |
| background: #ffffff; | |
| border-radius: 5px; | |
| } | |
| .response-container { | |
| margin-top: 2rem; | |
| padding: 1rem; | |
| background: rgba(255, 255, 255, 0.05); | |
| border-radius: 10px; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| def create_voice_component(): | |
| """Create auto-searching voice recognition component""" | |
| return components.html( | |
| """ | |
| <div style="padding: 20px; border-radius: 10px; background: #f0f2f6;"> | |
| <div id="status" style="margin-bottom: 10px; color: #666;">Starting voice recognition...</div> | |
| <div id="interim" style="color: #666; min-height: 24px;"></div> | |
| <div id="output" style="margin-top: 10px; padding: 10px; min-height: 100px; | |
| background: white; border-radius: 5px; white-space: pre-wrap;"></div> | |
| <script> | |
| if ('webkitSpeechRecognition' in window) { | |
| const recognition = new webkitSpeechRecognition(); | |
| recognition.continuous = true; | |
| recognition.interimResults = true; | |
| const status = document.getElementById('status'); | |
| const interim = document.getElementById('interim'); | |
| const output = document.getElementById('output'); | |
| let fullTranscript = ''; | |
| let lastPauseTime = Date.now(); | |
| let pauseThreshold = 1500; | |
| window.addEventListener('load', () => { | |
| setTimeout(() => { | |
| try { | |
| recognition.start(); | |
| status.textContent = 'Listening...'; | |
| } catch (e) { | |
| console.error('Start error:', e); | |
| status.textContent = 'Error starting recognition'; | |
| } | |
| }, 1000); | |
| }); | |
| recognition.onresult = (event) => { | |
| let interimTranscript = ''; | |
| let finalTranscript = ''; | |
| for (let i = event.resultIndex; i < event.results.length; i++) { | |
| const transcript = event.results[i][0].transcript; | |
| if (event.results[i].isFinal) { | |
| finalTranscript += transcript + ' '; | |
| lastPauseTime = Date.now(); | |
| } else { | |
| interimTranscript += transcript; | |
| } | |
| } | |
| if (finalTranscript) { | |
| fullTranscript += finalTranscript; | |
| interim.textContent = ''; | |
| output.textContent = fullTranscript; | |
| window.parent.postMessage({ | |
| type: 'streamlit:setComponentValue', | |
| value: { | |
| text: fullTranscript, | |
| trigger: 'speech' | |
| }, | |
| dataType: 'json', | |
| }, '*'); | |
| } else if (interimTranscript) { | |
| interim.textContent = '... ' + interimTranscript; | |
| } | |
| output.scrollTop = output.scrollHeight; | |
| }; | |
| setInterval(() => { | |
| if (fullTranscript && Date.now() - lastPauseTime > pauseThreshold) { | |
| if (output.dataset.lastProcessed !== fullTranscript) { | |
| output.dataset.lastProcessed = fullTranscript; | |
| window.parent.postMessage({ | |
| type: 'streamlit:setComponentValue', | |
| value: { | |
| text: fullTranscript, | |
| trigger: 'pause' | |
| }, | |
| dataType: 'json', | |
| }, '*'); | |
| } | |
| } | |
| }, 500); | |
| recognition.onend = () => { | |
| try { | |
| recognition.start(); | |
| status.textContent = 'Listening...'; | |
| } catch (e) { | |
| console.error('Restart error:', e); | |
| status.textContent = 'Recognition stopped. Refresh to restart.'; | |
| } | |
| }; | |
| recognition.onerror = (event) => { | |
| console.error('Recognition error:', event.error); | |
| status.textContent = 'Error: ' + event.error; | |
| }; | |
| } else { | |
| document.getElementById('status').textContent = 'Speech recognition not supported in this browser'; | |
| } | |
| </script> | |
| </div> | |
| """, | |
| height=200 | |
| ) | |
| def get_audio_autoplay_html(audio_path): | |
| """Create HTML for autoplaying audio with controls and download""" | |
| try: | |
| with open(audio_path, "rb") as audio_file: | |
| audio_bytes = audio_file.read() | |
| audio_b64 = base64.b64encode(audio_bytes).decode() | |
| return f''' | |
| <div class="audio-player"> | |
| <audio controls autoplay style="width: 100%;"> | |
| <source src="data:audio/mpeg;base64,{audio_b64}" type="audio/mpeg"> | |
| Your browser does not support the audio element. | |
| </audio> | |
| <div style="margin-top: 5px;"> | |
| <a href="data:audio/mpeg;base64,{audio_b64}" | |
| download="{os.path.basename(audio_path)}" | |
| style="text-decoration: none; color: #4CAF50;"> | |
| ⬇️ Download Audio | |
| </a> | |
| </div> | |
| </div> | |
| ''' | |
| except Exception as e: | |
| return f"Error loading audio: {str(e)}" | |
| # Audio Processing Functions | |
| def clean_for_speech(text: str) -> str: | |
| """Clean text for speech synthesis""" | |
| text = text.replace("\n", " ") | |
| text = text.replace("</s>", " ") | |
| text = text.replace("#", "") | |
| text = re.sub(r"\(https?:\/\/[^\)]+\)", "", text) | |
| text = re.sub(r"\s+", " ", text).strip() | |
| return text | |
| async def generate_audio(text, voice="en-US-AriaNeural", rate="+0%", pitch="+0Hz"): | |
| """Generate audio using Edge TTS with automatic playback""" | |
| text = clean_for_speech(text) | |
| if not text.strip(): | |
| return None | |
| timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | |
| output_file = f"response_{timestamp}.mp3" | |
| communicate = edge_tts.Communicate(text, voice, rate=rate, pitch=pitch) | |
| await communicate.save(output_file) | |
| return output_file | |
| def render_audio_result(audio_file, title="Generated Audio"): | |
| """Render audio result with autoplay in Streamlit""" | |
| if audio_file and os.path.exists(audio_file): | |
| st.markdown(f"### {title}") | |
| st.markdown(get_audio_autoplay_html(audio_file), unsafe_allow_html=True) | |
| async def process_voice_search(query, voice="en-US-AriaNeural"): | |
| """Process voice search with automatic audio using selected voice""" | |
| response, refs = perform_arxiv_search(query) | |
| audio_file = await generate_audio(response, voice=voice) | |
| st.session_state.current_audio = audio_file | |
| return response, audio_file | |
| # Arxiv Search Functions | |
| def perform_arxiv_search(query): | |
| """Enhanced Arxiv search with summary""" | |
| client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern") | |
| refs = client.predict( | |
| query, 20, "Semantic Search", | |
| "mistralai/Mixtral-8x7B-Instruct-v0.1", | |
| api_name="/update_with_rag_md" | |
| )[0] | |
| summary = client.predict( | |
| query, | |
| "mistralai/Mixtral-8x7B-Instruct-v0.1", | |
| True, | |
| api_name="/ask_llm" | |
| ) | |
| response = f"### Search Results for: {query}\n\n{summary}\n\n### References\n\n{refs}" | |
| return response, refs | |
| def perform_ai_lookup(q, vocal_summary=True, extended_refs=False, titles_summary=True, | |
| full_audio=False, voice="en-US-AriaNeural"): | |
| """Full Arxiv search with audio summaries""" | |
| start = time.time() | |
| response, refs = perform_arxiv_search(q) | |
| st.markdown(response) | |
| # Generate audio responses | |
| if full_audio: | |
| audio_file = asyncio.run(generate_audio(response, voice=voice)) | |
| if audio_file: | |
| render_audio_result(audio_file, "Complete Response") | |
| if vocal_summary: | |
| summary_audio = asyncio.run(generate_audio( | |
| f"Summary of results for query: {q}", | |
| voice=voice | |
| )) | |
| if summary_audio: | |
| render_audio_result(summary_audio, "Summary") | |
| elapsed = time.time() - start | |
| st.write(f"**Total Elapsed:** {elapsed:.2f} s") | |
| return response | |
| def render_search_interface(): | |
| """Main search interface with voice recognition and model selection""" | |
| st.header("🔍 Voice Search & Research") | |
| # Get voice component value and set up model selection | |
| mycomponent = components.declare_component("mycomponent", path="mycomponent") | |
| val = mycomponent(my_input_value="Hello") | |
| # Show input in edit box if detected | |
| if val: | |
| val_stripped = val.replace('\n', ' ') | |
| edited_input = st.text_area("✏️ Edit Input:", value=val_stripped, height=100) | |
| run_option = st.selectbox("Model:", ["Arxiv", "GPT-4o", "Claude-3.5"]) | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| autorun = st.checkbox("⚙ AutoRun", value=True) | |
| with col2: | |
| full_audio = st.checkbox("📚FullAudio", value=False, | |
| help="Generate full audio response") | |
| input_changed = (val != st.session_state.get('old_val', None)) | |
| if autorun and input_changed: | |
| st.session_state.old_val = val | |
| if run_option == "Arxiv": | |
| perform_ai_lookup(edited_input, vocal_summary=True, extended_refs=False, | |
| titles_summary=True, full_audio=full_audio) | |
| else: | |
| if run_option == "GPT-4o": | |
| process_with_gpt(edited_input) | |
| elif run_option == "Claude-3.5": | |
| process_with_claude(edited_input) | |
| else: | |
| if st.button("▶ Run"): | |
| st.session_state.old_val = val | |
| if run_option == "Arxiv": | |
| perform_ai_lookup(edited_input, vocal_summary=True, extended_refs=False, | |
| titles_summary=True, full_audio=full_audio) | |
| else: | |
| if run_option == "GPT-4o": | |
| process_with_gpt(edited_input) | |
| elif run_option == "Claude-3.5": | |
| process_with_claude(edited_input) | |
| def main(): | |
| st.sidebar.markdown("### 🚲BikeAI🏆 Multi-Agent Research") | |
| tab_main = st.radio("Action:", ["🎤 Voice", "📸 Media", "🔍 ArXiv", "📝 Editor"], horizontal=True) | |
| if tab_main == "🎤 Voice": | |
| render_search_interface() | |
| elif tab_main == "🔍 ArXiv": | |
| st.subheader("🔍 Query ArXiv") | |
| q = st.text_input("🔍 Query:") | |
| st.markdown("### 🎛 Options") | |
| vocal_summary = st.checkbox("🎙ShortAudio", value=True) | |
| extended_refs = st.checkbox("📜LongRefs", value=False) | |
| titles_summary = st.checkbox("🔖TitlesOnly", value=True) | |
| full_audio = st.checkbox("📚FullAudio", value=False, | |
| help="Full audio of results") | |
| full_transcript = st.checkbox("🧾FullTranscript", value=False, | |
| help="Generate a full transcript file") | |
| if q and st.button("🔍Run"): | |
| result = perform_ai_lookup(q, vocal_summary=vocal_summary, extended_refs=extended_refs, | |
| titles_summary=titles_summary, full_audio=full_audio) | |
| if full_transcript: | |
| save_full_transcript(q, result) | |
| st.markdown("### Change Prompt & Re-Run") | |
| q_new = st.text_input("🔄 Modify Query:") | |
| if q_new and st.button("🔄 Re-Run with Modified Query"): | |
| result = perform_ai_lookup(q_new, vocal_summary=vocal_summary, extended_refs=extended_refs, | |
| titles_summary=titles_summary, full_audio=full_audio) | |
| if full_transcript: | |
| save_full_transcript(q_new, result) | |
| elif tab_main == "📸 Media": | |
| st.header("📸 Images & 🎥 Videos") | |
| tabs = st.tabs(["🖼 Images", "🎥 Video"]) | |
| with tabs[0]: | |
| imgs = glob.glob("*.png")+glob.glob("*.jpg") | |
| if imgs: | |
| c = st.slider("Cols",1,5,3) | |
| cols = st.columns(c) | |
| for i,f in enumerate(imgs): | |
| with cols[i%c]: | |
| st.image(Image.open(f),use_container_width=True) | |
| if st.button(f"👀 Analyze {os.path.basename(f)}", key=f"analyze_{f}"): | |
| a = process_image(f,"Describe this image.") | |
| st.markdown(a) | |
| else: | |
| st.write("No images found.") | |
| with tabs[1]: | |
| vids = glob.glob("*.mp4") | |
| if vids: | |
| for v in vids: | |
| with st.expander(f"🎥 {os.path.basename(v)}"): | |
| st.video(v) | |
| if st.button(f"Analyze {os.path.basename(v)}", key=f"analyze_{v}"): | |
| a = process_video_with_gpt(v,"Describe video.") | |
| st.markdown(a) | |
| else: | |
| st.write("No videos found.") | |
| elif tab_main == "📝 Editor": | |
| if getattr(st.session_state,'current_file',None): | |
| st.subheader(f"Editing: {st.session_state.current_file}") | |
| new_text = st.text_area("✏️ Content:", st.session_state.file_content, height=300) | |
| if st.button("💾 Save"): | |
| with open(st.session_state.current_file,'w',encoding='utf-8') as f: | |
| f.write(new_text) | |
| st.success("Updated!") | |
| st.session_state.should_rerun = True | |
| else: | |
| st.write("Select a file from the sidebar to edit.") | |
| groups, sorted_prefixes = load_files_for_sidebar() | |
| display_file_manager_sidebar(groups, sorted_prefixes) | |
| if st.session_state.viewing_prefix and st.session_state.viewing_prefix in groups: | |
| st.write("---") | |
| st.write(f"**Viewing Group:** {st.session_state.viewing_prefix}") | |
| for f in groups[st.session_state.viewing_prefix]: | |
| fname = os.path.basename(f) | |
| ext = os.path.splitext(fname)[1].lower().strip('.') | |
| st.write(f"### {fname}") | |
| if ext == "md": | |
| content = open(f,'r',encoding='utf-8').read() | |
| st.markdown(content) | |
| elif ext == "mp3": | |
| st.audio(f) | |
| else: | |
| st.markdown(get_download_link(f), unsafe_allow_html=True) | |
| if st.button("❌ Close"): | |
| st.session_state.viewing_prefix = None | |
| if st.session_state.should_rerun: | |
| st.session_state.should_rerun = False | |
| st.rerun() | |