Spaces:

ilsa15
/

chatbot

Sleeping

App Files Files Community

ilsa15 commited on Jul 26, 2025

Commit

f006201

verified ·

1 Parent(s): 1251c8d

Update app.py

Browse files

Files changed (1) hide show

app.py +131 -23

app.py CHANGED Viewed

@@ -1,72 +1,180 @@
 import nest_asyncio
 from youtube_transcript_api import YouTubeTranscriptApi
 import streamlit as st
 import os
 from groq import Groq
 nest_asyncio.apply()
 # --- CONFIGURATION ---
-YOUTUBE_API_KEY = os.environ.get("YOUTUBE_API_KEY")  # Set in your HuggingFace Secrets
 channel_id = "UCsv3kmQ5k1eIRG2R9mWN"  # @icodeguru0
-# Initialize Groq client once
 groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
-# --- FUNCTION: Fetch recent video IDs from YouTube channel ---
 def get_latest_video_ids(channel_id, max_results=5):
-    import requests
     url = f"https://www.googleapis.com/youtube/v3/search?key={YOUTUBE_API_KEY}&channelId={channel_id}&part=snippet,id&order=date&maxResults={max_results}"
     response = requests.get(url)
     videos = response.json().get('items', [])
     return [v['id']['videoId'] for v in videos if v['id']['kind'] == 'youtube#video']
-# --- FUNCTION: Get video transcripts ---
 def get_video_transcripts(video_ids):
-    all_transcripts = []
     for vid in video_ids:
         try:
             transcript = YouTubeTranscriptApi.get_transcript(vid)
             text = " ".join([t['text'] for t in transcript])
-            all_transcripts.append(text)
         except:
             continue
-    return all_transcripts
-# --- FUNCTION: Ask Groq API using official client ---
-def ask_groq(context, question):
     messages = [
-        {"role": "system", "content": "You are a helpful assistant."},
-        {"role": "user", "content": f"Context: {context}\n\nQuestion: {question}\nAnswer:"}
     ]
     chat_completion = groq_client.chat.completions.create(
-        model="llama-3.3-70b-versatile",  # Or the model you have access to
         messages=messages,
     )
     return chat_completion.choices[0].message.content.strip()
 # --- STREAMLIT APP ---
 def main():
-    st.set_page_config(page_title="EduBot - YouTube Channel QA", layout="wide")
-    st.title("🎓 EduBot for @icodeguru0")
-    st.markdown("Ask anything based on the channel’s recent videos.")
     question = st.text_input("💬 Ask your question here:")
     if question:
-        with st.spinner("🔍 Fetching videos and transcripts..."):
             video_ids = get_latest_video_ids(channel_id)
-            transcripts = get_video_transcripts(video_ids)
-            full_context = "\n\n".join(transcripts)
         with st.spinner("🧠 Thinking..."):
-            answer = ask_groq(full_context, question)
-        st.success(answer)
     st.markdown("---")
-    st.caption("Powered by YouTube + Groq | Built for @icodeguru0")
 if __name__ == "__main__":
     main()

+# import nest_asyncio
+# from youtube_transcript_api import YouTubeTranscriptApi
+# import streamlit as st
+# import os
+# from groq import Groq
+# nest_asyncio.apply()
+# # --- CONFIGURATION ---
+# YOUTUBE_API_KEY = os.environ.get("YOUTUBE_API_KEY")  # Set in your HuggingFace Secrets
+# channel_id = "UCsv3kmQ5k1eIRG2R9mWN"  # @icodeguru0
+# # Initialize Groq client once
+# groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
+# # --- FUNCTION: Fetch recent video IDs from YouTube channel ---
+# def get_latest_video_ids(channel_id, max_results=5):
+#     import requests
+#     url = f"https://www.googleapis.com/youtube/v3/search?key={YOUTUBE_API_KEY}&channelId={channel_id}&part=snippet,id&order=date&maxResults={max_results}"
+#     response = requests.get(url)
+#     videos = response.json().get('items', [])
+#     return [v['id']['videoId'] for v in videos if v['id']['kind'] == 'youtube#video']
+# # --- FUNCTION: Get video transcripts ---
+# def get_video_transcripts(video_ids):
+#     all_transcripts = []
+#     for vid in video_ids:
+#         try:
+#             transcript = YouTubeTranscriptApi.get_transcript(vid)
+#             text = " ".join([t['text'] for t in transcript])
+#             all_transcripts.append(text)
+#         except:
+#             continue
+#     return all_transcripts
+# # --- FUNCTION: Ask Groq API using official client ---
+# def ask_groq(context, question):
+#     messages = [
+#         {"role": "system", "content": "You are a helpful assistant."},
+#         {"role": "user", "content": f"Context: {context}\n\nQuestion: {question}\nAnswer:"}
+#     ]
+#     chat_completion = groq_client.chat.completions.create(
+#         model="llama-3.3-70b-versatile",  # Or the model you have access to
+#         messages=messages,
+#     )
+#     return chat_completion.choices[0].message.content.strip()
+# # --- STREAMLIT APP ---
+# def main():
+#     st.set_page_config(page_title="EduBot - YouTube Channel QA", layout="wide")
+#     st.title("🎓 EduBot for @icodeguru0")
+#     st.markdown("Ask anything based on the channel’s recent videos.")
+#     question = st.text_input("💬 Ask your question here:")
+#     if question:
+#         with st.spinner("🔍 Fetching videos and transcripts..."):
+#             video_ids = get_latest_video_ids(channel_id)
+#             transcripts = get_video_transcripts(video_ids)
+#             full_context = "\n\n".join(transcripts)
+#         with st.spinner("🧠 Thinking..."):
+#             answer = ask_groq(full_context, question)
+#         st.success(answer)
+#     st.markdown("---")
+#     st.caption("Powered by YouTube + Groq | Built for @icodeguru0")
+# if __name__ == "__main__":
+#     main()
 import nest_asyncio
 from youtube_transcript_api import YouTubeTranscriptApi
 import streamlit as st
 import os
 from groq import Groq
+import requests
+from bs4 import BeautifulSoup
 nest_asyncio.apply()
 # --- CONFIGURATION ---
+YOUTUBE_API_KEY = os.environ.get("YOUTUBE_API_KEY")
 channel_id = "UCsv3kmQ5k1eIRG2R9mWN"  # @icodeguru0
+TARGET_WEBSITE = "https://icodeguru.com"
 groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
+# --- FUNCTION: Fetch recent YouTube video IDs ---
 def get_latest_video_ids(channel_id, max_results=5):
     url = f"https://www.googleapis.com/youtube/v3/search?key={YOUTUBE_API_KEY}&channelId={channel_id}&part=snippet,id&order=date&maxResults={max_results}"
     response = requests.get(url)
     videos = response.json().get('items', [])
     return [v['id']['videoId'] for v in videos if v['id']['kind'] == 'youtube#video']
+# --- FUNCTION: Get YouTube video transcripts ---
 def get_video_transcripts(video_ids):
+    transcripts = []
     for vid in video_ids:
         try:
             transcript = YouTubeTranscriptApi.get_transcript(vid)
             text = " ".join([t['text'] for t in transcript])
+            transcripts.append(f"(YouTube Video ID: {vid})\n{text}")
         except:
             continue
+    return transcripts
+# --- FUNCTION: Scrape textual content + URLs from a website ---
+def scrape_website_text_and_links(base_url, max_pages=5):
+    visited = set()
+    data = []
+    def scrape(url):
+        if url in visited or len(visited) >= max_pages:
+            return
+        visited.add(url)
+        try:
+            res = requests.get(url, timeout=10)
+            soup = BeautifulSoup(res.content, "html.parser")
+            text = soup.get_text(separator=" ", strip=True)
+            data.append({"url": url, "content": text[:2000]})  # Limit content per page
+            for link in soup.find_all("a", href=True):
+                href = link['href']
+                if href.startswith("/") and not href.startswith("//"):
+                    href = base_url + href
+                if href.startswith(base_url):
+                    scrape(href)
+        except:
+            pass
+    scrape(base_url)
+    return data
+# --- FUNCTION: Ask Groq with context ---
+def ask_groq(context_blocks, question):
+    formatted_context = "\n\n".join(
+        [f"[Source]({block['url']}):\n{block['content']}" for block in context_blocks]
+    )
     messages = [
+        {"role": "system", "content": "You are a helpful assistant that gives direct answers with reference links from the source."},
+        {"role": "user", "content": f"Context:\n{formatted_context}\n\nQuestion: {question}\nAnswer with clickable links:"}
     ]
     chat_completion = groq_client.chat.completions.create(
+        model="llama-3.3-70b-versatile",
         messages=messages,
     )
     return chat_completion.choices[0].message.content.strip()
 # --- STREAMLIT APP ---
 def main():
+    st.set_page_config(page_title="EduBot - YouTube + Website QA", layout="wide")
+    st.title("📚 EduBot for @icodeguru0")
+    st.markdown("Ask your question based on **latest YouTube videos** and **icodeguru.com website** content.")
     question = st.text_input("💬 Ask your question here:")
     if question:
+        with st.spinner("🎥 Fetching videos and transcripts..."):
             video_ids = get_latest_video_ids(channel_id)
+            yt_transcripts = get_video_transcripts(video_ids)
+            yt_blocks = [{"url": f"https://www.youtube.com/watch?v={vid}", "content": txt} for vid, txt in zip(video_ids, yt_transcripts)]
+        with st.spinner("🌐 Scraping website content..."):
+            website_blocks = scrape_website_text_and_links(TARGET_WEBSITE, max_pages=5)
+        context_blocks = yt_blocks + website_blocks
         with st.spinner("🧠 Thinking..."):
+            answer = ask_groq(context_blocks, question)
+        st.markdown("### ✅ Answer:")
+        st.markdown(answer, unsafe_allow_html=True)
     st.markdown("---")
+    st.caption("Powered by YouTube + iCodeGuru.com + Groq | Built for @icodeguru0")
 if __name__ == "__main__":
     main()