Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import anthropic, openai, base64, cv2, glob, json, math, os, pytz, random, re, requests, time, zipfile | |
| from datetime import datetime | |
| from audio_recorder_streamlit import audio_recorder | |
| from collections import defaultdict, Counter | |
| from dotenv import load_dotenv | |
| from gradio_client import Client | |
| from huggingface_hub import InferenceClient | |
| from PIL import Image | |
| from openai import OpenAI | |
| import asyncio | |
| import edge_tts | |
| from streamlit_marquee import streamlit_marquee | |
| st.set_page_config( | |
| page_title="๐ฒTalkingAIResearcher๐", | |
| page_icon="๐ฒ๐", | |
| layout="wide" | |
| ) | |
| EDGE_TTS_VOICES = [ | |
| "en-US-AriaNeural", | |
| "en-US-GuyNeural", | |
| "en-US-JennyNeural", | |
| "en-GB-SoniaNeural" | |
| ] | |
| FILE_EMOJIS = { | |
| "md": "๐", | |
| "mp3": "๐ต", | |
| "wav": "๐", | |
| "txt": "๐", | |
| "pdf": "๐" | |
| } | |
| # Initialize session states | |
| if 'tts_voice' not in st.session_state: | |
| st.session_state['tts_voice'] = EDGE_TTS_VOICES[0] | |
| if 'audio_format' not in st.session_state: | |
| st.session_state['audio_format'] = 'mp3' | |
| if 'messages' not in st.session_state: | |
| st.session_state['messages'] = [] | |
| if 'chat_history' not in st.session_state: | |
| st.session_state['chat_history'] = [] | |
| if 'viewing_prefix' not in st.session_state: | |
| st.session_state['viewing_prefix'] = None | |
| if 'should_rerun' not in st.session_state: | |
| st.session_state['should_rerun'] = False | |
| # API Setup | |
| openai_client = OpenAI(api_key=os.getenv('OPENAI_API_KEY')) | |
| claude_client = anthropic.Anthropic(api_key=os.getenv('ANTHROPIC_API_KEY')) | |
| def get_cached_audio_b64(file_path): | |
| with open(file_path, "rb") as f: | |
| return base64.b64encode(f.read()).decode() | |
| def beautify_filename(filename): | |
| name = os.path.splitext(filename)[0] | |
| return name.replace('_', ' ').replace('.', ' ') | |
| def create_zip_of_files(md_files, mp3_files, wav_files, query=''): | |
| all_files = md_files + mp3_files + wav_files | |
| if not all_files: return None | |
| timestamp = datetime.now().strftime("%y%m_%H%M") | |
| zip_name = f"{timestamp}_archive.zip" | |
| with zipfile.ZipFile(zip_name, 'w') as z: | |
| for f in all_files: | |
| z.write(f) | |
| return zip_name | |
| def get_download_link(file_path, file_type="zip"): | |
| with open(file_path, "rb") as f: | |
| b64 = base64.b64encode(f.read()).decode() | |
| ext_map = {'zip': '๐ฆ', 'mp3': '๐ต', 'wav': '๐', 'md': '๐'} | |
| emoji = ext_map.get(file_type, '') | |
| return f'<a href="data:application/{file_type};base64,{b64}" download="{os.path.basename(file_path)}">{emoji} Download {os.path.basename(file_path)}</a>' | |
| def load_files_for_sidebar(): | |
| files = [f for f in glob.glob("*.*") if not f.lower().endswith('readme.md')] | |
| groups = defaultdict(list) | |
| for f in files: | |
| basename = os.path.basename(f) | |
| group_name = basename[:9] if len(basename) >= 9 else 'Other' | |
| groups[group_name].append(f) | |
| return sorted(groups.items(), | |
| key=lambda x: max(os.path.getmtime(f) for f in x[1]), | |
| reverse=True) | |
| def display_marquee_controls(): | |
| st.sidebar.markdown("### ๐ฏ Marquee Settings") | |
| cols = st.sidebar.columns(2) | |
| with cols[0]: | |
| bg_color = st.color_picker("๐จ Background", "#1E1E1E") | |
| text_color = st.color_picker("โ๏ธ Text", "#FFFFFF") | |
| with cols[1]: | |
| font_size = st.slider("๐ Size", 10, 24, 14) | |
| duration = st.slider("โฑ๏ธ Speed", 1, 20, 10) | |
| return { | |
| "background": bg_color, | |
| "color": text_color, | |
| "font-size": f"{font_size}px", | |
| "animationDuration": f"{duration}s", | |
| "width": "100%", | |
| "lineHeight": "35px" | |
| } | |
| def display_file_manager_sidebar(groups_sorted): | |
| st.sidebar.title("๐ File Manager") | |
| all_files = {'md': [], 'mp3': [], 'wav': []} | |
| for _, files in groups_sorted: | |
| for f in files: | |
| ext = os.path.splitext(f)[1].lower().strip('.') | |
| if ext in all_files: | |
| all_files[ext].append(f) | |
| cols = st.sidebar.columns(4) | |
| for i, (ext, files) in enumerate(all_files.items()): | |
| with cols[i]: | |
| if st.button(f"๐๏ธ {ext.upper()}"): | |
| [os.remove(f) for f in files] | |
| st.session_state.should_rerun = True | |
| if st.sidebar.button("๐ฆ Zip All"): | |
| zip_name = create_zip_of_files( | |
| all_files['md'], all_files['mp3'], all_files['wav'] | |
| ) | |
| if zip_name: | |
| st.sidebar.markdown(get_download_link(zip_name), unsafe_allow_html=True) | |
| for group_name, files in groups_sorted: | |
| timestamp = (datetime.strptime(group_name, "%y%m_%H%M").strftime("%Y-%m-%d %H:%M") | |
| if len(group_name) == 9 else group_name) | |
| with st.sidebar.expander(f"๐ {timestamp} ({len(files)})", expanded=True): | |
| c1, c2 = st.columns(2) | |
| with c1: | |
| if st.button("๐", key=f"view_{group_name}"): | |
| st.session_state.viewing_prefix = group_name | |
| with c2: | |
| if st.button("๐๏ธ", key=f"del_{group_name}"): | |
| [os.remove(f) for f in files] | |
| st.session_state.should_rerun = True | |
| for f in files: | |
| ext = os.path.splitext(f)[1].lower().strip('.') | |
| emoji = FILE_EMOJIS.get(ext, '๐') | |
| pretty_name = beautify_filename(os.path.basename(f)) | |
| st.write(f"{emoji} **{pretty_name}**") | |
| if ext in ['mp3', 'wav']: | |
| st.audio(f) | |
| if st.button("๐", key=f"loop_{f}"): | |
| audio_b64 = get_cached_audio_b64(f) | |
| st.components.v1.html( | |
| f''' | |
| <audio id="player_{f}" loop> | |
| <source src="data:audio/{ext};base64,{audio_b64}"> | |
| </audio> | |
| <script> | |
| document.getElementById("player_{f}").play(); | |
| </script> | |
| ''', | |
| height=0 | |
| ) | |
| async def edge_tts_generate(text, voice, file_format="mp3"): | |
| text = re.sub(r'\s+', ' ', text).strip() | |
| if not text: return None | |
| communicate = edge_tts.Communicate(text, voice) | |
| filename = f"{datetime.now().strftime('%y%m_%H%M')}_{voice}.{file_format}" | |
| await communicate.save(filename) | |
| return filename | |
| def parse_arxiv_refs(text): | |
| papers = [] | |
| current_paper = None | |
| for line in text.split('\n'): | |
| if '|' in line: | |
| if current_paper: | |
| papers.append(current_paper) | |
| parts = line.strip('* ').split('|') | |
| current_paper = { | |
| 'date': parts[0].strip(), | |
| 'title': parts[1].strip(), | |
| 'authors': '', | |
| 'summary': '', | |
| 'id': re.search(r'(\d{4}\.\d{5})', line).group(1) if re.search(r'(\d{4}\.\d{5})', line) else '' | |
| } | |
| elif current_paper: | |
| if not current_paper['authors']: | |
| current_paper['authors'] = line.strip('* ') | |
| else: | |
| current_paper['summary'] += ' ' + line.strip() | |
| if current_paper: | |
| papers.append(current_paper) | |
| return papers | |
| def perform_ai_lookup(query): | |
| client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern") | |
| response = client.predict( | |
| query, 20, "Semantic Search", | |
| "mistralai/Mixtral-8x7B-Instruct-v0.1", | |
| api_name="/update_with_rag_md" | |
| ) | |
| papers = parse_arxiv_refs(response[0]) | |
| marquee_settings = display_marquee_controls() | |
| for paper in papers: | |
| content = f"๐ {paper['title']} | ๐ค {paper['authors']} | ๐ {paper['summary']}" | |
| streamlit_marquee( | |
| content=content, | |
| **marquee_settings, | |
| key=f"paper_{paper['id'] or random.randint(1000,9999)}" | |
| ) | |
| st.write("") # Spacing | |
| return papers | |
| def main(): | |
| marquee_settings = display_marquee_controls() | |
| streamlit_marquee( | |
| content="๐ Welcome to TalkingAIResearcher | ๐ค Your Research Assistant", | |
| **marquee_settings, | |
| key="welcome" | |
| ) | |
| tab = st.radio("Action:", ["๐ค Voice", "๐ ArXiv", "๐ Editor"], horizontal=True) | |
| if tab == "๐ ArXiv": | |
| query = st.text_input("๐ Search:") | |
| if query: | |
| papers = perform_ai_lookup(query) | |
| st.write(f"Found {len(papers)} papers") | |
| groups = load_files_for_sidebar() | |
| display_file_manager_sidebar(groups) | |
| if st.session_state.should_rerun: | |
| st.session_state.should_rerun = False | |
| st.rerun() | |
| # Condensed sidebar markdown | |
| sidebar_md = """# ๐ Research Papers | |
| ## ๐ง AGI Levels | |
| L0 โ No AI | |
| L1 ๐ฑ ChatGPT/Bard [2303.08774v1](https://arxiv.org/abs/2303.08774) [PDF](https://arxiv.org/pdf/2303.08774.pdf) | |
| L2 ๐ช Watson [2201.11903v1](https://arxiv.org/abs/2201.11903) [PDF](https://arxiv.org/pdf/2201.11903.pdf) | |
| L3 ๐ฏ DALLยทE [2204.06125v1](https://arxiv.org/abs/2204.06125) [PDF](https://arxiv.org/pdf/2204.06125.pdf) | |
| L4 ๐ AlphaGo [1712.01815v1](https://arxiv.org/abs/1712.01815) [PDF](https://arxiv.org/pdf/1712.01815.pdf) | |
| L5 ๐ AlphaFold [2203.15556v1](https://arxiv.org/abs/2203.15556) [PDF](https://arxiv.org/pdf/2203.15556.pdf) | |
| ## ๐งฌ AlphaFold2 | |
| [2203.15556v1](https://arxiv.org/abs/2203.15556) [PDF](https://arxiv.org/pdf/2203.15556.pdf) | |
| 1. ๐งฌ Input Seq โ 2. ๐ DB Search โ 3. ๐งฉ MSA | |
| 4. ๐ Templates โ 5. ๐ Evoformer โ 6. ๐งฑ Structure | |
| 7. ๐ฏ 3D Predict โ 8. โป๏ธ Recycle x3""" | |
| st.sidebar.markdown(sidebar_md) | |
| if __name__ == "__main__": | |
| main() |