ilsa15 commited on
Commit
b4d166a
Β·
verified Β·
1 Parent(s): f006201

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -69
app.py CHANGED
@@ -69,112 +69,85 @@
69
  # if __name__ == "__main__":
70
  # main()
71
 
72
-
73
-
74
- import nest_asyncio
75
- from youtube_transcript_api import YouTubeTranscriptApi
76
  import streamlit as st
77
- import os
78
- from groq import Groq
79
  import requests
80
  from bs4 import BeautifulSoup
 
 
 
81
 
82
  nest_asyncio.apply()
83
 
84
  # --- CONFIGURATION ---
85
- YOUTUBE_API_KEY = os.environ.get("YOUTUBE_API_KEY")
86
- channel_id = "UCsv3kmQ5k1eIRG2R9mWN" # @icodeguru0
87
- TARGET_WEBSITE = "https://icodeguru.com"
88
-
89
- groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
90
-
91
- # --- FUNCTION: Fetch recent YouTube video IDs ---
92
- def get_latest_video_ids(channel_id, max_results=5):
93
- url = f"https://www.googleapis.com/youtube/v3/search?key={YOUTUBE_API_KEY}&channelId={channel_id}&part=snippet,id&order=date&maxResults={max_results}"
94
- response = requests.get(url)
95
- videos = response.json().get('items', [])
96
- return [v['id']['videoId'] for v in videos if v['id']['kind'] == 'youtube#video']
97
-
98
- # --- FUNCTION: Get YouTube video transcripts ---
99
- def get_video_transcripts(video_ids):
100
- transcripts = []
101
- for vid in video_ids:
102
- try:
103
- transcript = YouTubeTranscriptApi.get_transcript(vid)
104
- text = " ".join([t['text'] for t in transcript])
105
- transcripts.append(f"(YouTube Video ID: {vid})\n{text}")
106
- except:
107
- continue
108
- return transcripts
109
-
110
- # --- FUNCTION: Scrape textual content + URLs from a website ---
111
- def scrape_website_text_and_links(base_url, max_pages=5):
112
  visited = set()
113
- data = []
114
 
115
- def scrape(url):
116
  if url in visited or len(visited) >= max_pages:
117
  return
118
  visited.add(url)
119
  try:
120
- res = requests.get(url, timeout=10)
121
- soup = BeautifulSoup(res.content, "html.parser")
122
- text = soup.get_text(separator=" ", strip=True)
123
- data.append({"url": url, "content": text[:2000]}) # Limit content per page
 
 
 
 
 
124
  for link in soup.find_all("a", href=True):
125
- href = link['href']
126
- if href.startswith("/") and not href.startswith("//"):
127
  href = base_url + href
128
- if href.startswith(base_url):
129
- scrape(href)
130
- except:
131
  pass
132
 
133
- scrape(base_url)
134
- return data
135
 
136
- # --- FUNCTION: Ask Groq with context ---
137
- def ask_groq(context_blocks, question):
138
- formatted_context = "\n\n".join(
139
  [f"[Source]({block['url']}):\n{block['content']}" for block in context_blocks]
140
  )
141
  messages = [
142
- {"role": "system", "content": "You are a helpful assistant that gives direct answers with reference links from the source."},
143
- {"role": "user", "content": f"Context:\n{formatted_context}\n\nQuestion: {question}\nAnswer with clickable links:"}
144
  ]
145
- chat_completion = groq_client.chat.completions.create(
146
  model="llama-3.3-70b-versatile",
147
  messages=messages,
148
  )
149
- return chat_completion.choices[0].message.content.strip()
150
 
151
- # --- STREAMLIT APP ---
152
  def main():
153
- st.set_page_config(page_title="EduBot - YouTube + Website QA", layout="wide")
154
- st.title("πŸ“š EduBot for @icodeguru0")
155
- st.markdown("Ask your question based on **latest YouTube videos** and **icodeguru.com website** content.")
156
 
157
  question = st.text_input("πŸ’¬ Ask your question here:")
158
  if question:
159
- with st.spinner("πŸŽ₯ Fetching videos and transcripts..."):
160
- video_ids = get_latest_video_ids(channel_id)
161
- yt_transcripts = get_video_transcripts(video_ids)
162
- yt_blocks = [{"url": f"https://www.youtube.com/watch?v={vid}", "content": txt} for vid, txt in zip(video_ids, yt_transcripts)]
163
-
164
- with st.spinner("🌐 Scraping website content..."):
165
- website_blocks = scrape_website_text_and_links(TARGET_WEBSITE, max_pages=5)
166
-
167
- context_blocks = yt_blocks + website_blocks
168
 
169
  with st.spinner("🧠 Thinking..."):
170
- answer = ask_groq(context_blocks, question)
171
 
172
  st.markdown("### βœ… Answer:")
173
  st.markdown(answer, unsafe_allow_html=True)
174
 
175
  st.markdown("---")
176
- st.caption("Powered by YouTube + iCodeGuru.com + Groq | Built for @icodeguru0")
177
 
178
  if __name__ == "__main__":
179
  main()
180
-
 
69
  # if __name__ == "__main__":
70
  # main()
71
 
 
 
 
 
72
  import streamlit as st
 
 
73
  import requests
74
  from bs4 import BeautifulSoup
75
+ import os
76
+ import nest_asyncio
77
+ from groq import Groq
78
 
79
  nest_asyncio.apply()
80
 
81
  # --- CONFIGURATION ---
82
+ BASE_URL = "https://icode.guru"
83
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
84
+ groq_client = Groq(api_key=GROQ_API_KEY)
85
+
86
+ # --- FUNCTION: Scrape iCode.guru pages ---
87
+ def scrape_icodeguru(base_url, max_pages=5):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  visited = set()
89
+ content_blocks = []
90
 
91
+ def crawl(url):
92
  if url in visited or len(visited) >= max_pages:
93
  return
94
  visited.add(url)
95
  try:
96
+ response = requests.get(url, timeout=10)
97
+ soup = BeautifulSoup(response.content, "html.parser")
98
+ page_text = soup.get_text(separator=" ", strip=True)
99
+ page_text = page_text.replace("\n", " ").strip()
100
+ if len(page_text) > 100:
101
+ content_blocks.append({
102
+ "url": url,
103
+ "content": page_text[:2000] # Limit to 2000 characters
104
+ })
105
  for link in soup.find_all("a", href=True):
106
+ href = link["href"]
107
+ if href.startswith("/"):
108
  href = base_url + href
109
+ if href.startswith(base_url) and href not in visited:
110
+ crawl(href)
111
+ except Exception as e:
112
  pass
113
 
114
+ crawl(base_url)
115
+ return content_blocks
116
 
117
+ # --- FUNCTION: Ask Groq with website-only context ---
118
+ def ask_icodeguru_bot(context_blocks, question):
119
+ full_context = "\n\n".join(
120
  [f"[Source]({block['url']}):\n{block['content']}" for block in context_blocks]
121
  )
122
  messages = [
123
+ {"role": "system", "content": "You are a helpful assistant. Answer only from the provided website content. Include clickable source links."},
124
+ {"role": "user", "content": f"Context:\n{full_context}\n\nQuestion: {question}\nAnswer:"}
125
  ]
126
+ response = groq_client.chat.completions.create(
127
  model="llama-3.3-70b-versatile",
128
  messages=messages,
129
  )
130
+ return response.choices[0].message.content.strip()
131
 
132
+ # --- STREAMLIT UI ---
133
  def main():
134
+ st.set_page_config(page_title="iCodeGuru ChatBot", layout="wide")
135
+ st.title("πŸ€– Ask iCodeGuru Bot")
136
+ st.markdown("Ask questions and get answers **only from [icode.guru](https://icode.guru/)**.")
137
 
138
  question = st.text_input("πŸ’¬ Ask your question here:")
139
  if question:
140
+ with st.spinner("🌐 Scraping icode.guru..."):
141
+ website_blocks = scrape_icodeguru(BASE_URL, max_pages=5)
 
 
 
 
 
 
 
142
 
143
  with st.spinner("🧠 Thinking..."):
144
+ answer = ask_icodeguru_bot(website_blocks, question)
145
 
146
  st.markdown("### βœ… Answer:")
147
  st.markdown(answer, unsafe_allow_html=True)
148
 
149
  st.markdown("---")
150
+ st.caption("Powered by Groq + iCodeGuru | Built by @ilsa")
151
 
152
  if __name__ == "__main__":
153
  main()