Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import anthropic, openai, base64, cv2, glob, json, math, os, pytz, random, re, requests, time, zipfile | |
| from datetime import datetime | |
| from audio_recorder_streamlit import audio_recorder | |
| from collections import defaultdict | |
| from dotenv import load_dotenv | |
| from gradio_client import Client | |
| from huggingface_hub import InferenceClient | |
| from PIL import Image | |
| from openai import OpenAI | |
| from streamlit_marquee import streamlit_marquee | |
| import asyncio | |
| import edge_tts | |
| # App Config | |
| st.set_page_config(page_title="🚲TalkingAIResearcher🏆", page_icon="🚲🏆", layout="wide") | |
| load_dotenv() | |
| EDGE_TTS_VOICES = ["en-US-AriaNeural", "en-US-GuyNeural", "en-US-JennyNeural", "en-GB-SoniaNeural"] | |
| FILE_EMOJIS = {"md": "📝", "mp3": "🎵", "wav": "🔊", "txt": "📄", "pdf": "📑", "html": "🌐"} | |
| # Initialize session state | |
| state_vars = { | |
| 'tts_voice': EDGE_TTS_VOICES[0], | |
| 'audio_format': 'mp3', | |
| 'messages': [], | |
| 'chat_history': [], | |
| 'transcript_history': [], | |
| 'viewing_prefix': None, | |
| 'should_rerun': False, | |
| 'editing_mode': False, | |
| 'current_file': None, | |
| 'file_content': None, | |
| 'old_val': None, | |
| 'last_query': '' | |
| } | |
| for key, default in state_vars.items(): | |
| if key not in st.session_state: | |
| st.session_state[key] = default | |
| # API clients setup | |
| openai_client = OpenAI(api_key=os.getenv('OPENAI_API_KEY')) | |
| claude_client = anthropic.Anthropic(api_key=os.getenv('ANTHROPIC_API_KEY')) | |
| def get_cached_audio_b64(file_path): | |
| with open(file_path, "rb") as f: | |
| return base64.b64encode(f.read()).decode() | |
| def beautify_filename(filename): | |
| name = os.path.splitext(filename)[0] | |
| return name.replace('_', ' ').replace('.', ' ') | |
| def clean_speech_text(text): | |
| text = re.sub(r'\s+', ' ', text.strip()) | |
| text = text.replace("</s>", "").replace("#", "") | |
| text = re.sub(r"\(https?:\/\/[^\)]+\)", "", text) | |
| return text | |
| async def edge_tts_generate(text, voice="en-US-AriaNeural", rate=0, pitch=0, file_format="mp3"): | |
| text = clean_speech_text(text) | |
| if not text: return None | |
| communicate = edge_tts.Communicate(text, voice, rate=f"{rate}%", pitch=f"{pitch}Hz") | |
| filename = f"{datetime.now().strftime('%y%m_%H%M')}_{voice}.{file_format}" | |
| await communicate.save(filename) | |
| return filename | |
| def speak_text(text, voice=None, file_format=None): | |
| if not text: return None | |
| voice = voice or st.session_state['tts_voice'] | |
| fmt = file_format or st.session_state['audio_format'] | |
| return asyncio.run(edge_tts_generate(text, voice, file_format=fmt)) | |
| def process_audio_file(audio_path): | |
| with open(audio_path, "rb") as f: | |
| transcript = openai_client.audio.transcriptions.create(model="whisper-1", file=f) | |
| text = transcript.text | |
| st.session_state.messages.append({"role": "user", "content": text}) | |
| return text | |
| def process_with_gpt(text): | |
| if not text: return | |
| st.session_state.messages.append({"role": "user", "content": text}) | |
| with st.chat_message("user"): st.markdown(text) | |
| with st.chat_message("assistant"): | |
| response = openai_client.chat.completions.create( | |
| model="gpt-4-turbo-preview", | |
| messages=st.session_state.messages, | |
| stream=False | |
| ) | |
| answer = response.choices[0].message.content | |
| st.write(f"GPT-4: {answer}") | |
| create_file(text, answer, "md") | |
| st.session_state.messages.append({"role": "assistant", "content": answer}) | |
| return answer | |
| def process_with_claude(text): | |
| if not text: return | |
| with st.chat_message("user"): st.markdown(text) | |
| with st.chat_message("assistant"): | |
| response = claude_client.messages.create( | |
| model="claude-3-sonnet-20240229", | |
| max_tokens=4000, | |
| messages=[{"role": "user", "content": text}] | |
| ) | |
| answer = response.content[0].text | |
| st.write(f"Claude-3: {answer}") | |
| create_file(text, answer, "md") | |
| st.session_state.chat_history.append({"user": text, "claude": answer}) | |
| return answer | |
| def load_files_for_sidebar(): | |
| """Load and filter files by timestamp prefix""" | |
| files = [] | |
| for f in glob.glob("*.*"): | |
| basename = os.path.basename(f) | |
| if f.endswith('.md'): | |
| if len(basename) >= 9 and re.match(r'\d{4}_\d{4}', basename[:9]): | |
| files.append(f) | |
| else: | |
| files.append(f) | |
| groups = defaultdict(list) | |
| for f in files: | |
| basename = os.path.basename(f) | |
| group_name = basename[:9] if len(basename) >= 9 else 'Other' | |
| groups[group_name].append(f) | |
| return sorted(groups.items(), | |
| key=lambda x: max(os.path.getmtime(f) for f in x[1]), | |
| reverse=True) | |
| def display_file_manager_sidebar(groups_sorted): | |
| st.sidebar.title("📚 File Manager") | |
| all_files = {'md': [], 'mp3': [], 'wav': []} | |
| for _, files in groups_sorted: | |
| for f in files: | |
| ext = os.path.splitext(f)[1].lower().strip('.') | |
| if ext in all_files: | |
| all_files[ext].append(f) | |
| cols = st.sidebar.columns(4) | |
| for i, (ext, files) in enumerate(all_files.items()): | |
| with cols[i]: | |
| if st.button(f"🗑️ {ext.upper()}"): | |
| [os.remove(f) for f in files] | |
| st.session_state.should_rerun = True | |
| if st.sidebar.button("📦 Zip All"): | |
| zip_name = create_zip_of_files(all_files['md'], all_files['mp3'], all_files['wav']) | |
| if zip_name: | |
| st.sidebar.markdown(get_download_link(zip_name), unsafe_allow_html=True) | |
| for group_name, files in groups_sorted: | |
| try: | |
| timestamp = datetime.strptime(group_name, "%y%m_%H%M").strftime("%Y-%m-%d %H:%M") if len(group_name) == 9 and group_name != 'Other' else group_name | |
| except ValueError: | |
| timestamp = group_name | |
| with st.sidebar.expander(f"📁 {timestamp} ({len(files)})", expanded=True): | |
| c1, c2 = st.columns(2) | |
| with c1: | |
| if st.button("👀", key=f"view_{group_name}"): | |
| st.session_state.viewing_prefix = group_name | |
| with c2: | |
| if st.button("🗑️", key=f"del_{group_name}"): | |
| [os.remove(f) for f in files] | |
| st.session_state.should_rerun = True | |
| for f in files: | |
| ext = os.path.splitext(f)[1].lower().strip('.') | |
| emoji = FILE_EMOJIS.get(ext, '📄') | |
| pretty_name = beautify_filename(os.path.basename(f)) | |
| st.write(f"{emoji} **{pretty_name}**") | |
| if ext in ['mp3', 'wav']: | |
| st.audio(f) | |
| if st.button("🔄", key=f"loop_{f}"): | |
| audio_b64 = get_cached_audio_b64(f) | |
| st.components.v1.html( | |
| f'''<audio id="player_{f}" loop> | |
| <source src="data:audio/{ext};base64,{audio_b64}"> | |
| </audio> | |
| <script> | |
| document.getElementById("player_{f}").play(); | |
| </script>''', | |
| height=0 | |
| ) | |
| def perform_arxiv_search(query): | |
| client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern") | |
| papers = client.predict( | |
| query, 20, "Semantic Search", | |
| "mistralai/Mixtral-8x7B-Instruct-v0.1", | |
| api_name="/update_with_rag_md" | |
| )[0] | |
| summary = client.predict( | |
| query, | |
| "mistralai/Mixtral-8x7B-Instruct-v0.1", | |
| True, | |
| api_name="/ask_llm" | |
| ) | |
| result = f"### 🔎 {query}\n\n{summary}\n\n{papers}" | |
| st.markdown(result) | |
| papers_data = parse_arxiv_refs(papers) | |
| if papers_data: | |
| create_paper_audio(papers_data, query) | |
| display_papers(papers_data) | |
| create_file(query, result, "md") | |
| return result | |
| def parse_arxiv_refs(text): | |
| papers = [] | |
| current = None | |
| for line in text.split('\n'): | |
| if '|' in line: | |
| if current: papers.append(current) | |
| parts = line.strip('* ').split('|') | |
| current = { | |
| 'date': parts[0].strip(), | |
| 'title': parts[1].strip(), | |
| 'authors': '', | |
| 'summary': '', | |
| 'id': re.search(r'(\d{4}\.\d{5})', line).group(1) if re.search(r'(\d{4}\.\d{5})', line) else '' | |
| } | |
| elif current: | |
| if not current['authors']: | |
| current['authors'] = line.strip('* ') | |
| else: | |
| current['summary'] += ' ' + line.strip() | |
| if current: papers.append(current) | |
| return papers[:20] | |
| def create_paper_audio(papers, query): | |
| combined = [] | |
| for paper in papers: | |
| try: | |
| text = f"{paper['title']} by {paper['authors']}. {paper['summary']}" | |
| file_format = st.session_state['audio_format'] | |
| audio_file = speak_text(text, file_format=file_format) | |
| paper['audio'] = audio_file | |
| st.write(f"### {FILE_EMOJIS.get(file_format, '')} {os.path.basename(audio_file)}") | |
| st.audio(audio_file) | |
| combined.append(paper['title']) | |
| except Exception as e: | |
| st.warning(f"Error generating audio for {paper['title']}: {str(e)}") | |
| if combined: | |
| summary = f"Found papers about: {'; '.join(combined)}. Query was: {query}" | |
| summary_audio = speak_text(summary) | |
| if summary_audio: | |
| st.write("### 📢 Summary") | |
| st.audio(summary_audio) | |
| def main(): | |
| st.sidebar.markdown("### 🚲BikeAI🏆 Research Assistant") | |
| # Voice settings | |
| st.sidebar.markdown("### 🎤 Voice Config") | |
| voice = st.sidebar.selectbox("Voice:", EDGE_TTS_VOICES, | |
| index=EDGE_TTS_VOICES.index(st.session_state['tts_voice'])) | |
| fmt = st.sidebar.radio("Format:", ["MP3", "WAV"], index=0) | |
| if voice != st.session_state['tts_voice']: | |
| st.session_state['tts_voice'] = voice | |
| st.rerun() | |
| if fmt.lower() != st.session_state['audio_format']: | |
| st.session_state['audio_format'] = fmt.lower() | |
| st.rerun() | |
| mode = st.radio("Mode:", ["🎤 Voice", "🔍 ArXiv", "📝 Editor"], horizontal=True) | |
| if mode == "🔍 ArXiv": | |
| query = st.text_input("🔍 Search:") | |
| if query: | |
| perform_arxiv_search(query) | |
| elif mode == "🎤 Voice": | |
| text = st.text_area("Message:", height=100).strip() | |
| if st.button("Send"): | |
| process_with_gpt(text) | |
| st.subheader("History") | |
| tab1, tab2 = st.tabs(["Claude", "GPT-4"]) | |
| with tab1: | |
| for msg in st.session_state.chat_history: | |
| st.write("You:", msg["user"]) | |
| st.write("Claude:", msg["claude"]) | |
| with tab2: | |
| for msg in st.session_state.messages: | |
| with st.chat_message(msg["role"]): | |
| st.markdown(msg["content"]) | |
| elif mode == "📝 Editor": | |
| if st.session_state.current_file: | |
| st.subheader(f"Editing: {st.session_state.current_file}") | |
| new_content = st.text_area("Content:", st.session_state.file_content, height=300) | |
| if st.button("Save"): | |
| with open(st.session_state.current_file, 'w') as f: | |
| f.write(new_content) | |
| st.success("Saved!") | |
| st.session_state.should_rerun = True | |
| groups = load_files_for_sidebar() | |
| display_file_manager_sidebar(groups) | |
| if st.session_state.should_rerun: | |
| st.session_state.should_rerun = False | |
| st.rerun() | |
| sidebar_md = """# 📚 Research Papers | |
| ## AGI Levels | |
| L0 ❌ No AI | |
| L1 🌱 ChatGPT [2303.08774](https://arxiv.org/abs/2303.08774) | [PDF](https://arxiv.org/pdf/2303.08774.pdf) | |
| L2 💪 Watson [2201.11903](https://arxiv.org/abs/2201.11903) | [PDF](https://arxiv.org/pdf/2201.11903.pdf) | |
| L3 🎯 DALL·E [2204.06125](https://arxiv.org/abs/2204.06125) | [PDF](https://arxiv.org/pdf/2204.06125.pdf) | |
| L4 🏆 AlphaGo [1712.01815](https://arxiv.org/abs/1712.01815) | [PDF](https://arxiv.org/pdf/1712.01815.pdf) | |
| L5 🚀 AlphaFold [L5 🚀 AlphaFold [2203.15556](https://arxiv.org/abs/2203.15556) | [PDF](https://arxiv.org/pdf/2203.15556.pdf) | |
| ## 🧬 AlphaFold2 | |
| [2203.15556](https://arxiv.org/abs/2203.15556) | [PDF](https://arxiv.org/pdf/2203.15556.pdf) | |
| 1. 🧬 Input → 2. 🔍 Search → 3. 🧩 MSA | |
| 4. 📑 Templates → 5. 🔄 Evoformer → 6. 🧱 Structure | |
| 7. 🎯 3D Predict → 8. ♻️ Recycle""" | |
| st.sidebar.markdown(sidebar_md) | |
| if __name__ == "__main__": | |
| main() |