Spaces:

awacke1
/

CodeCompetitionClaudeVsGPT

Sleeping

App Files Files Community

awacke1 commited on Dec 19, 2024

Commit

5d65381

verified ·

1 Parent(s): 7938082

Update app.py

Browse files

Files changed (1) hide show

app.py +77 -66

app.py CHANGED Viewed

@@ -16,15 +16,8 @@ import requests
 from collections import defaultdict
 from audio_recorder_streamlit import audio_recorder
 import streamlit.components.v1 as components
-import openai
-from dotenv import load_dotenv
-# Load environment
-load_dotenv()
-openai.api_key = os.getenv('OPENAI_API_KEY')
-# Ensure edge_tts and other dependencies are installed
-# pip install edge-tts openai streamlit-audiorecorder
 # Initialize session state
 if 'search_history' not in st.session_state:
@@ -50,7 +43,7 @@ class VideoSearch:
         self.load_dataset()
     def fetch_dataset_rows(self):
-        """Fetch dataset from HF API"""
         try:
             url = "https://datasets-server.huggingface.co/first-rows?dataset=omegalabsinc%2Fomega-multimodal&config=default&split=train"
             response = requests.get(url, timeout=30)
@@ -70,12 +63,11 @@ class VideoSearch:
                         processed_rows.append(row)
                     df = pd.DataFrame(processed_rows)
-                    # Update search columns
                     st.session_state['search_columns'] = [col for col in df.columns
                                                         if col not in ['video_embed', 'description_embed', 'audio_embed']]
                     return df
             return self.load_example_data()
-        except Exception:
             return self.load_example_data()
     def prepare_features(self):
@@ -113,13 +105,14 @@ class VideoSearch:
             else:
                 self.text_embeds = self.video_embeds
-        except Exception:
             # Fallback to random embeddings
             num_rows = len(self.dataset)
             self.video_embeds = np.random.randn(num_rows, 384)
             self.text_embeds = np.random.randn(num_rows, 384)
     def load_example_data(self):
         example_data = [
             {
                 "video_id": "cd21da96-fcca-4c94-a60f-0b1e4e1e29fc",
@@ -162,10 +155,8 @@ class VideoSearch:
         return results
-# Use edge_tts for TTS
 @st.cache_resource
 def get_speech_model():
-    """Cache speech model initialization."""
     return edge_tts.Communicate
 async def generate_speech(text, voice=None):
@@ -183,14 +174,10 @@ async def generate_speech(text, voice=None):
         return None
 def transcribe_audio(audio_path):
-    """Transcribe audio using Whisper."""
-    try:
-        with open(audio_path, "rb") as f:
-            transcription = openai.Audio.transcribe("whisper-1", f)
-        return transcription["text"].strip()
-    except Exception as e:
-        st.error(f"Error transcribing audio: {e}")
-        return ""
 def show_file_manager():
     """Display file manager interface"""
@@ -225,34 +212,67 @@ def show_file_manager():
                     os.remove(f)
                     st.experimental_rerun()
-##########################
-# Arxiv Integration      #
-##########################
-# You need to implement or integrate perform_ai_lookup from your second app into this code.
-# This is a placeholder. Replace with your actual perform_ai_lookup function logic.
-# Ensure you have your Arxiv RAG model endpoint available.
-# Example placeholder implementation (replace with your actual second app code):
-def perform_ai_lookup(q, vocal_summary=True, extended_refs=False, titles_summary=True, full_audio=False):
-    # Placeholder: In your real code, you'll call your Arxiv RAG endpoint and get results.
-    # Here we just simulate a response.
-    mock_answer = f"This is a mock Arxiv response for query: {q}.\nReferences:\n[Paper 1] Example Title"
-    st.markdown(f"**Arxiv Search Results for '{q}':**\n\n{mock_answer}")
     if vocal_summary:
-        audio_file = asyncio.run(generate_speech("This is a spoken summary of Arxiv results."))
         if audio_file:
             st.audio(audio_file)
-    # Add any other logic: extended_refs, titles_summary, etc.
-    return mock_answer
-############################
-# Main App Layout & Logic  #
-############################
 def main():
-    st.title("🎥 Video & Arxiv Search with Voice")
     # Initialize search class
     search = VideoSearch()
@@ -308,39 +328,32 @@ def main():
                             audio_file = asyncio.run(generate_speech(summary))
                             if audio_file:
                                 st.audio(audio_file)
-                                # Optionally delete after playing:
-                                # if os.path.exists(audio_file):
-                                #    os.remove(audio_file)
     # ---- Tab 2: Voice Input ----
     with tab2:
         st.subheader("Voice Input")
-        st.write("🎙️ Record your voice and automatically transcribe to text:")
         audio_bytes = audio_recorder()
         if audio_bytes:
-            # Save the recorded audio for transcription
             audio_path = f"temp_audio_{datetime.now().strftime('%Y%m%d_%H%M%S')}.wav"
             with open(audio_path, "wb") as f:
                 f.write(audio_bytes)
             st.success("Audio recorded successfully!")
-            # Transcribe using Whisper
             voice_query = transcribe_audio(audio_path)
-            if voice_query:
-                st.markdown("**Transcribed Text:**")
-                st.write(voice_query)
-                st.session_state['last_voice_input'] = voice_query
-                if st.button("🔍 Search from Voice"):
-                    results = search.search(voice_query, None, 20)
-                    for i, result in enumerate(results, 1):
-                        with st.expander(f"Result {i}", expanded=(i==1)):
-                            st.write(result['description'])
-                            if result.get('youtube_id'):
-                                st.video(f"https://youtube.com/watch?v={result['youtube_id']}&t={result.get('start_time', 0)}")
-            # Clean up
             if os.path.exists(audio_path):
                 os.remove(audio_path)
@@ -349,14 +362,12 @@ def main():
         st.subheader("Arxiv Search")
         q = st.text_input("Enter your Arxiv search query:", value=st.session_state['arxiv_last_query'])
         vocal_summary = st.checkbox("🎙 Short Audio Summary", value=True)
-        extended_refs = st.checkbox("📜 Extended References", value=False)
         titles_summary = st.checkbox("🔖 Titles Only", value=True)
         full_audio = st.checkbox("📚 Full Audio Results", value=False)
         if st.button("🔍 Arxiv Search"):
             st.session_state['arxiv_last_query'] = q
-            perform_ai_lookup(q, vocal_summary=vocal_summary, extended_refs=extended_refs,
-                              titles_summary=titles_summary, full_audio=full_audio)
     # ---- Tab 4: File Manager ----
     with tab4:
@@ -374,7 +385,7 @@ def main():
             with st.expander(f"{entry['timestamp']}: {entry['query']}"):
                 for i, result in enumerate(entry['results'], 1):
                     st.write(f"{i}. {result['description'][:100]}...")
         st.markdown("### Voice Settings")
         st.selectbox("TTS Voice:",
                      ["en-US-AriaNeural", "en-US-GuyNeural", "en-GB-SoniaNeural"],

 from collections import defaultdict
 from audio_recorder_streamlit import audio_recorder
 import streamlit.components.v1 as components
+from urllib.parse import quote
+from xml.etree import ElementTree as ET
 # Initialize session state
 if 'search_history' not in st.session_state:
         self.load_dataset()
     def fetch_dataset_rows(self):
+        """Fetch dataset from Hugging Face API"""
         try:
             url = "https://datasets-server.huggingface.co/first-rows?dataset=omegalabsinc%2Fomega-multimodal&config=default&split=train"
             response = requests.get(url, timeout=30)
                         processed_rows.append(row)
                     df = pd.DataFrame(processed_rows)
                     st.session_state['search_columns'] = [col for col in df.columns
                                                         if col not in ['video_embed', 'description_embed', 'audio_embed']]
                     return df
             return self.load_example_data()
+        except:
             return self.load_example_data()
     def prepare_features(self):
             else:
                 self.text_embeds = self.video_embeds
+        except:
             # Fallback to random embeddings
             num_rows = len(self.dataset)
             self.video_embeds = np.random.randn(num_rows, 384)
             self.text_embeds = np.random.randn(num_rows, 384)
     def load_example_data(self):
+        """Load example data as fallback"""
         example_data = [
             {
                 "video_id": "cd21da96-fcca-4c94-a60f-0b1e4e1e29fc",
         return results
 @st.cache_resource
 def get_speech_model():
     return edge_tts.Communicate
 async def generate_speech(text, voice=None):
         return None
 def transcribe_audio(audio_path):
+    """Placeholder for ASR transcription (no OpenAI/Anthropic).
+       Integrate your own ASR model or API here."""
+    # For now, just return a message:
+    return "ASR not implemented. Integrate a local model or another service here."
 def show_file_manager():
     """Display file manager interface"""
                     os.remove(f)
                     st.experimental_rerun()
+def arxiv_search(query, max_results=5):
+    """Perform a simple Arxiv search using their API and return top results."""
+    base_url = "http://export.arxiv.org/api/query?"
+    # Encode the query
+    search_url = base_url + f"search_query={quote(query)}&start=0&max_results={max_results}"
+    r = requests.get(search_url)
+    if r.status_code == 200:
+        root = ET.fromstring(r.text)
+        # Namespace handling
+        ns = {'atom': 'http://www.w3.org/2005/Atom'}
+        entries = root.findall('atom:entry', ns)
+        results = []
+        for entry in entries:
+            title = entry.find('atom:title', ns).text.strip()
+            summary = entry.find('atom:summary', ns).text.strip()
+            link = None
+            for l in entry.findall('atom:link', ns):
+                if l.get('type') == 'text/html':
+                    link = l.get('href')
+                    break
+            results.append((title, summary, link))
+        return results
+    return []
+def perform_arxiv_lookup(q, vocal_summary=True, titles_summary=True, full_audio=False):
+    results = arxiv_search(q, max_results=5)
+    if not results:
+        st.write("No Arxiv results found.")
+        return
+    st.markdown(f"**Arxiv Search Results for '{q}':**")
+    for i, (title, summary, link) in enumerate(results, start=1):
+        st.markdown(f"**{i}. {title}**")
+        st.write(summary)
+        if link:
+            st.markdown(f"[View Paper]({link})")
+    # TTS Options
     if vocal_summary:
+        spoken_text = f"Here are some Arxiv results for {q}. "
+        if titles_summary:
+            spoken_text += " Titles: " + ", ".join([res[0] for res in results])
+        else:
+            # Just first summary if no titles_summary
+            spoken_text += " " + results[0][1][:200]
+        audio_file = asyncio.run(generate_speech(spoken_text))
         if audio_file:
             st.audio(audio_file)
+    if full_audio:
+        # Full audio of summaries
+        full_text = ""
+        for i,(title, summary, _) in enumerate(results, start=1):
+            full_text += f"Result {i}: {title}. {summary} "
+        audio_file_full = asyncio.run(generate_speech(full_text))
+        if audio_file_full:
+            st.write("### Full Audio")
+            st.audio(audio_file_full)
 def main():
+    st.title("🎥 Video & Arxiv Search with Voice (No OpenAI/Anthropic)")
     # Initialize search class
     search = VideoSearch()
                             audio_file = asyncio.run(generate_speech(summary))
                             if audio_file:
                                 st.audio(audio_file)
     # ---- Tab 2: Voice Input ----
     with tab2:
         st.subheader("Voice Input")
+        st.write("🎙️ Record your voice:")
         audio_bytes = audio_recorder()
         if audio_bytes:
             audio_path = f"temp_audio_{datetime.now().strftime('%Y%m%d_%H%M%S')}.wav"
             with open(audio_path, "wb") as f:
                 f.write(audio_bytes)
             st.success("Audio recorded successfully!")
             voice_query = transcribe_audio(audio_path)
+            st.markdown("**Transcribed Text:**")
+            st.write(voice_query)
+            st.session_state['last_voice_input'] = voice_query
+            if st.button("🔍 Search from Voice"):
+                results = search.search(voice_query, None, 20)
+                for i, result in enumerate(results, 1):
+                    with st.expander(f"Result {i}", expanded=(i==1)):
+                        st.write(result['description'])
+                        if result.get('youtube_id'):
+                            st.video(f"https://youtube.com/watch?v={result['youtube_id']}&t={result.get('start_time', 0)}")
             if os.path.exists(audio_path):
                 os.remove(audio_path)
         st.subheader("Arxiv Search")
         q = st.text_input("Enter your Arxiv search query:", value=st.session_state['arxiv_last_query'])
         vocal_summary = st.checkbox("🎙 Short Audio Summary", value=True)
         titles_summary = st.checkbox("🔖 Titles Only", value=True)
         full_audio = st.checkbox("📚 Full Audio Results", value=False)
         if st.button("🔍 Arxiv Search"):
             st.session_state['arxiv_last_query'] = q
+            perform_arxiv_lookup(q, vocal_summary=vocal_summary, titles_summary=titles_summary, full_audio=full_audio)
     # ---- Tab 4: File Manager ----
     with tab4:
             with st.expander(f"{entry['timestamp']}: {entry['query']}"):
                 for i, result in enumerate(entry['results'], 1):
                     st.write(f"{i}. {result['description'][:100]}...")
         st.markdown("### Voice Settings")
         st.selectbox("TTS Voice:",
                      ["en-US-AriaNeural", "en-US-GuyNeural", "en-GB-SoniaNeural"],