ilsa15 commited on
Commit
9156401
ยท
verified ยท
1 Parent(s): b4d166a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -40
app.py CHANGED
@@ -68,86 +68,107 @@
68
 
69
  # if __name__ == "__main__":
70
  # main()
71
-
 
72
  import streamlit as st
73
- import requests
74
- from bs4 import BeautifulSoup
75
  import os
76
- import nest_asyncio
77
  from groq import Groq
 
 
78
 
79
  nest_asyncio.apply()
80
 
81
  # --- CONFIGURATION ---
 
 
82
  BASE_URL = "https://icode.guru"
83
- GROQ_API_KEY = os.getenv("GROQ_API_KEY")
84
- groq_client = Groq(api_key=GROQ_API_KEY)
85
 
86
- # --- FUNCTION: Scrape iCode.guru pages ---
87
- def scrape_icodeguru(base_url, max_pages=5):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  visited = set()
89
- content_blocks = []
90
 
91
  def crawl(url):
92
  if url in visited or len(visited) >= max_pages:
93
  return
94
  visited.add(url)
95
  try:
96
- response = requests.get(url, timeout=10)
97
- soup = BeautifulSoup(response.content, "html.parser")
98
  page_text = soup.get_text(separator=" ", strip=True)
99
- page_text = page_text.replace("\n", " ").strip()
100
  if len(page_text) > 100:
101
- content_blocks.append({
102
- "url": url,
103
- "content": page_text[:2000] # Limit to 2000 characters
104
- })
105
  for link in soup.find_all("a", href=True):
106
- href = link["href"]
107
  if href.startswith("/"):
108
  href = base_url + href
109
- if href.startswith(base_url) and href not in visited:
110
  crawl(href)
111
- except Exception as e:
112
  pass
113
 
114
  crawl(base_url)
115
- return content_blocks
116
 
117
- # --- FUNCTION: Ask Groq with website-only context ---
118
- def ask_icodeguru_bot(context_blocks, question):
119
- full_context = "\n\n".join(
120
- [f"[Source]({block['url']}):\n{block['content']}" for block in context_blocks]
121
- )
122
  messages = [
123
- {"role": "system", "content": "You are a helpful assistant. Answer only from the provided website content. Include clickable source links."},
124
- {"role": "user", "content": f"Context:\n{full_context}\n\nQuestion: {question}\nAnswer:"}
125
  ]
126
- response = groq_client.chat.completions.create(
127
- model="llama-3.3-70b-versatile",
128
  messages=messages,
129
  )
130
- return response.choices[0].message.content.strip()
131
 
132
- # --- STREAMLIT UI ---
133
  def main():
134
- st.set_page_config(page_title="iCodeGuru ChatBot", layout="wide")
135
- st.title("๐Ÿค– Ask iCodeGuru Bot")
136
- st.markdown("Ask questions and get answers **only from [icode.guru](https://icode.guru/)**.")
137
 
138
  question = st.text_input("๐Ÿ’ฌ Ask your question here:")
139
  if question:
 
 
 
 
 
140
  with st.spinner("๐ŸŒ Scraping icode.guru..."):
141
- website_blocks = scrape_icodeguru(BASE_URL, max_pages=5)
 
142
 
143
- with st.spinner("๐Ÿง  Thinking..."):
144
- answer = ask_icodeguru_bot(website_blocks, question)
145
 
146
- st.markdown("### โœ… Answer:")
147
- st.markdown(answer, unsafe_allow_html=True)
 
148
 
149
  st.markdown("---")
150
- st.caption("Powered by Groq + iCodeGuru | Built by @ilsa")
151
 
152
  if __name__ == "__main__":
153
  main()
 
68
 
69
  # if __name__ == "__main__":
70
  # main()
71
+ import nest_asyncio
72
+ from youtube_transcript_api import YouTubeTranscriptApi
73
  import streamlit as st
 
 
74
  import os
 
75
  from groq import Groq
76
+ import requests
77
+ from bs4 import BeautifulSoup
78
 
79
  nest_asyncio.apply()
80
 
81
  # --- CONFIGURATION ---
82
+ YOUTUBE_API_KEY = os.environ.get("YOUTUBE_API_KEY") # Set in your HuggingFace Secrets
83
+ channel_id = "UCsv3kmQ5k1eIRG2R9mWN" # @icodeguru0
84
  BASE_URL = "https://icode.guru"
 
 
85
 
86
+ # Initialize Groq client once
87
+ groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
88
+
89
+ # --- FUNCTION: Fetch recent video IDs from YouTube channel ---
90
+ def get_latest_video_ids(channel_id, max_results=5):
91
+ url = f"https://www.googleapis.com/youtube/v3/search?key={YOUTUBE_API_KEY}&channelId={channel_id}&part=snippet,id&order=date&maxResults={max_results}"
92
+ response = requests.get(url)
93
+ videos = response.json().get('items', [])
94
+ return [v['id']['videoId'] for v in videos if v['id']['kind'] == 'youtube#video']
95
+
96
+ # --- FUNCTION: Get video transcripts ---
97
+ def get_video_transcripts(video_ids):
98
+ all_transcripts = []
99
+ for vid in video_ids:
100
+ try:
101
+ transcript = YouTubeTranscriptApi.get_transcript(vid)
102
+ text = " ".join([t['text'] for t in transcript])
103
+ all_transcripts.append(text)
104
+ except:
105
+ continue
106
+ return all_transcripts
107
+
108
+ # --- NEW FUNCTION: Scrape icode.guru ---
109
+ def scrape_icodeguru(base_url="https://icode.guru", max_pages=5):
110
  visited = set()
111
+ blocks = []
112
 
113
  def crawl(url):
114
  if url in visited or len(visited) >= max_pages:
115
  return
116
  visited.add(url)
117
  try:
118
+ res = requests.get(url, timeout=10)
119
+ soup = BeautifulSoup(res.content, "html.parser")
120
  page_text = soup.get_text(separator=" ", strip=True)
 
121
  if len(page_text) > 100:
122
+ blocks.append(f"[Source]({url}):\n{page_text[:2000]}")
 
 
 
123
  for link in soup.find_all("a", href=True):
124
+ href = link['href']
125
  if href.startswith("/"):
126
  href = base_url + href
127
+ if href.startswith(base_url):
128
  crawl(href)
129
+ except:
130
  pass
131
 
132
  crawl(base_url)
133
+ return blocks
134
 
135
+ # --- FUNCTION: Ask Groq API using official client ---
136
+ def ask_groq(context, question):
 
 
 
137
  messages = [
138
+ {"role": "system", "content": "You are a helpful assistant. Only answer using the given context (YouTube + icode.guru). Provide links if possible."},
139
+ {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {question}\nAnswer:"}
140
  ]
141
+ chat_completion = groq_client.chat.completions.create(
142
+ model="llama-3.3-70b-versatile", # Or the model you have access to
143
  messages=messages,
144
  )
145
+ return chat_completion.choices[0].message.content.strip()
146
 
147
+ # --- STREAMLIT APP ---
148
  def main():
149
+ st.set_page_config(page_title="EduBot - YouTube + iCodeGuru QA", layout="wide")
150
+ st.title("๐ŸŽ“ EduBot for @icodeguru0")
151
+ st.markdown("Ask anything based on the channelโ€™s recent videos and website content from [icode.guru](https://icode.guru).")
152
 
153
  question = st.text_input("๐Ÿ’ฌ Ask your question here:")
154
  if question:
155
+ with st.spinner("๐Ÿ” Fetching videos and transcripts..."):
156
+ video_ids = get_latest_video_ids(channel_id)
157
+ transcripts = get_video_transcripts(video_ids)
158
+ yt_context = "\n\n".join(transcripts)
159
+
160
  with st.spinner("๐ŸŒ Scraping icode.guru..."):
161
+ site_blocks = scrape_icodeguru(BASE_URL, max_pages=5)
162
+ site_context = "\n\n".join(site_blocks)
163
 
164
+ full_context = yt_context + "\n\n" + site_context
 
165
 
166
+ with st.spinner("๐Ÿง  Thinking..."):
167
+ answer = ask_groq(full_context, question)
168
+ st.success(answer)
169
 
170
  st.markdown("---")
171
+ st.caption("Powered by YouTube + iCodeGuru + Groq | Built for @icodeguru0")
172
 
173
  if __name__ == "__main__":
174
  main()