Spaces:

ilsa15
/

chatbot

Sleeping

App Files Files Community

ilsa15 commited on Jul 26, 2025

Commit

b4d166a

verified ·

1 Parent(s): f006201

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -69

app.py CHANGED Viewed

@@ -69,112 +69,85 @@
 # if __name__ == "__main__":
 #     main()
-import nest_asyncio
-from youtube_transcript_api import YouTubeTranscriptApi
 import streamlit as st
-import os
-from groq import Groq
 import requests
 from bs4 import BeautifulSoup
 nest_asyncio.apply()
 # --- CONFIGURATION ---
-YOUTUBE_API_KEY = os.environ.get("YOUTUBE_API_KEY")
-channel_id = "UCsv3kmQ5k1eIRG2R9mWN"  # @icodeguru0
-TARGET_WEBSITE = "https://icodeguru.com"
-groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
-# --- FUNCTION: Fetch recent YouTube video IDs ---
-def get_latest_video_ids(channel_id, max_results=5):
-    url = f"https://www.googleapis.com/youtube/v3/search?key={YOUTUBE_API_KEY}&channelId={channel_id}&part=snippet,id&order=date&maxResults={max_results}"
-    response = requests.get(url)
-    videos = response.json().get('items', [])
-    return [v['id']['videoId'] for v in videos if v['id']['kind'] == 'youtube#video']
-# --- FUNCTION: Get YouTube video transcripts ---
-def get_video_transcripts(video_ids):
-    transcripts = []
-    for vid in video_ids:
-        try:
-            transcript = YouTubeTranscriptApi.get_transcript(vid)
-            text = " ".join([t['text'] for t in transcript])
-            transcripts.append(f"(YouTube Video ID: {vid})\n{text}")
-        except:
-            continue
-    return transcripts
-# --- FUNCTION: Scrape textual content + URLs from a website ---
-def scrape_website_text_and_links(base_url, max_pages=5):
     visited = set()
-    data = []
-    def scrape(url):
         if url in visited or len(visited) >= max_pages:
             return
         visited.add(url)
         try:
-            res = requests.get(url, timeout=10)
-            soup = BeautifulSoup(res.content, "html.parser")
-            text = soup.get_text(separator=" ", strip=True)
-            data.append({"url": url, "content": text[:2000]})  # Limit content per page
             for link in soup.find_all("a", href=True):
-                href = link['href']
-                if href.startswith("/") and not href.startswith("//"):
                     href = base_url + href
-                if href.startswith(base_url):
-                    scrape(href)
-        except:
             pass
-    scrape(base_url)
-    return data
-# --- FUNCTION: Ask Groq with context ---
-def ask_groq(context_blocks, question):
-    formatted_context = "\n\n".join(
         [f"[Source]({block['url']}):\n{block['content']}" for block in context_blocks]
     )
     messages = [
-        {"role": "system", "content": "You are a helpful assistant that gives direct answers with reference links from the source."},
-        {"role": "user", "content": f"Context:\n{formatted_context}\n\nQuestion: {question}\nAnswer with clickable links:"}
     ]
-    chat_completion = groq_client.chat.completions.create(
         model="llama-3.3-70b-versatile",
         messages=messages,
     )
-    return chat_completion.choices[0].message.content.strip()
-# --- STREAMLIT APP ---
 def main():
-    st.set_page_config(page_title="EduBot - YouTube + Website QA", layout="wide")
-    st.title("📚 EduBot for @icodeguru0")
-    st.markdown("Ask your question based on **latest YouTube videos** and **icodeguru.com website** content.")
     question = st.text_input("💬 Ask your question here:")
     if question:
-        with st.spinner("🎥 Fetching videos and transcripts..."):
-            video_ids = get_latest_video_ids(channel_id)
-            yt_transcripts = get_video_transcripts(video_ids)
-            yt_blocks = [{"url": f"https://www.youtube.com/watch?v={vid}", "content": txt} for vid, txt in zip(video_ids, yt_transcripts)]
-        with st.spinner("🌐 Scraping website content..."):
-            website_blocks = scrape_website_text_and_links(TARGET_WEBSITE, max_pages=5)
-        context_blocks = yt_blocks + website_blocks
         with st.spinner("🧠 Thinking..."):
-            answer = ask_groq(context_blocks, question)
         st.markdown("### ✅ Answer:")
         st.markdown(answer, unsafe_allow_html=True)
     st.markdown("---")
-    st.caption("Powered by YouTube + iCodeGuru.com + Groq | Built for @icodeguru0")
 if __name__ == "__main__":
     main()

 # if __name__ == "__main__":
 #     main()
 import streamlit as st
 import requests
 from bs4 import BeautifulSoup
+import os
+import nest_asyncio
+from groq import Groq
 nest_asyncio.apply()
 # --- CONFIGURATION ---
+BASE_URL = "https://icode.guru"
+GROQ_API_KEY = os.getenv("GROQ_API_KEY")
+groq_client = Groq(api_key=GROQ_API_KEY)
+# --- FUNCTION: Scrape iCode.guru pages ---
+def scrape_icodeguru(base_url, max_pages=5):
     visited = set()
+    content_blocks = []
+    def crawl(url):
         if url in visited or len(visited) >= max_pages:
             return
         visited.add(url)
         try:
+            response = requests.get(url, timeout=10)
+            soup = BeautifulSoup(response.content, "html.parser")
+            page_text = soup.get_text(separator=" ", strip=True)
+            page_text = page_text.replace("\n", " ").strip()
+            if len(page_text) > 100:
+                content_blocks.append({
+                    "url": url,
+                    "content": page_text[:2000]  # Limit to 2000 characters
+                })
             for link in soup.find_all("a", href=True):
+                href = link["href"]
+                if href.startswith("/"):
                     href = base_url + href
+                if href.startswith(base_url) and href not in visited:
+                    crawl(href)
+        except Exception as e:
             pass
+    crawl(base_url)
+    return content_blocks
+# --- FUNCTION: Ask Groq with website-only context ---
+def ask_icodeguru_bot(context_blocks, question):
+    full_context = "\n\n".join(
         [f"[Source]({block['url']}):\n{block['content']}" for block in context_blocks]
     )
     messages = [
+        {"role": "system", "content": "You are a helpful assistant. Answer only from the provided website content. Include clickable source links."},
+        {"role": "user", "content": f"Context:\n{full_context}\n\nQuestion: {question}\nAnswer:"}
     ]
+    response = groq_client.chat.completions.create(
         model="llama-3.3-70b-versatile",
         messages=messages,
     )
+    return response.choices[0].message.content.strip()
+# --- STREAMLIT UI ---
 def main():
+    st.set_page_config(page_title="iCodeGuru ChatBot", layout="wide")
+    st.title("🤖 Ask iCodeGuru Bot")
+    st.markdown("Ask questions and get answers **only from [icode.guru](https://icode.guru/)**.")
     question = st.text_input("💬 Ask your question here:")
     if question:
+        with st.spinner("🌐 Scraping icode.guru..."):
+            website_blocks = scrape_icodeguru(BASE_URL, max_pages=5)
         with st.spinner("🧠 Thinking..."):
+            answer = ask_icodeguru_bot(website_blocks, question)
         st.markdown("### ✅ Answer:")
         st.markdown(answer, unsafe_allow_html=True)
     st.markdown("---")
+    st.caption("Powered by Groq + iCodeGuru | Built by @ilsa")
 if __name__ == "__main__":
     main()