ilsa15 commited on
Commit
122e63b
·
verified ·
1 Parent(s): 9156401

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +113 -111
app.py CHANGED
@@ -1,93 +1,23 @@
1
 
2
 
3
- # import nest_asyncio
4
- # from youtube_transcript_api import YouTubeTranscriptApi
5
- # import streamlit as st
6
- # import os
7
- # from groq import Groq
8
-
9
- # nest_asyncio.apply()
10
-
11
- # # --- CONFIGURATION ---
12
- # YOUTUBE_API_KEY = os.environ.get("YOUTUBE_API_KEY") # Set in your HuggingFace Secrets
13
- # channel_id = "UCsv3kmQ5k1eIRG2R9mWN" # @icodeguru0
14
-
15
- # # Initialize Groq client once
16
- # groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
17
-
18
- # # --- FUNCTION: Fetch recent video IDs from YouTube channel ---
19
- # def get_latest_video_ids(channel_id, max_results=5):
20
- # import requests
21
- # url = f"https://www.googleapis.com/youtube/v3/search?key={YOUTUBE_API_KEY}&channelId={channel_id}&part=snippet,id&order=date&maxResults={max_results}"
22
- # response = requests.get(url)
23
- # videos = response.json().get('items', [])
24
- # return [v['id']['videoId'] for v in videos if v['id']['kind'] == 'youtube#video']
25
-
26
- # # --- FUNCTION: Get video transcripts ---
27
- # def get_video_transcripts(video_ids):
28
- # all_transcripts = []
29
- # for vid in video_ids:
30
- # try:
31
- # transcript = YouTubeTranscriptApi.get_transcript(vid)
32
- # text = " ".join([t['text'] for t in transcript])
33
- # all_transcripts.append(text)
34
- # except:
35
- # continue
36
- # return all_transcripts
37
-
38
- # # --- FUNCTION: Ask Groq API using official client ---
39
- # def ask_groq(context, question):
40
- # messages = [
41
- # {"role": "system", "content": "You are a helpful assistant."},
42
- # {"role": "user", "content": f"Context: {context}\n\nQuestion: {question}\nAnswer:"}
43
- # ]
44
- # chat_completion = groq_client.chat.completions.create(
45
- # model="llama-3.3-70b-versatile", # Or the model you have access to
46
- # messages=messages,
47
- # )
48
- # return chat_completion.choices[0].message.content.strip()
49
-
50
- # # --- STREAMLIT APP ---
51
- # def main():
52
- # st.set_page_config(page_title="EduBot - YouTube Channel QA", layout="wide")
53
- # st.title("🎓 EduBot for @icodeguru0")
54
- # st.markdown("Ask anything based on the channel’s recent videos.")
55
-
56
- # question = st.text_input("💬 Ask your question here:")
57
- # if question:
58
- # with st.spinner("🔍 Fetching videos and transcripts..."):
59
- # video_ids = get_latest_video_ids(channel_id)
60
- # transcripts = get_video_transcripts(video_ids)
61
- # full_context = "\n\n".join(transcripts)
62
- # with st.spinner("🧠 Thinking..."):
63
- # answer = ask_groq(full_context, question)
64
- # st.success(answer)
65
-
66
- # st.markdown("---")
67
- # st.caption("Powered by YouTube + Groq | Built for @icodeguru0")
68
-
69
- # if __name__ == "__main__":
70
- # main()
71
  import nest_asyncio
72
  from youtube_transcript_api import YouTubeTranscriptApi
73
  import streamlit as st
74
  import os
75
  from groq import Groq
76
- import requests
77
- from bs4 import BeautifulSoup
78
 
79
  nest_asyncio.apply()
80
 
81
  # --- CONFIGURATION ---
82
  YOUTUBE_API_KEY = os.environ.get("YOUTUBE_API_KEY") # Set in your HuggingFace Secrets
83
  channel_id = "UCsv3kmQ5k1eIRG2R9mWN" # @icodeguru0
84
- BASE_URL = "https://icode.guru"
85
 
86
  # Initialize Groq client once
87
  groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
88
 
89
  # --- FUNCTION: Fetch recent video IDs from YouTube channel ---
90
  def get_latest_video_ids(channel_id, max_results=5):
 
91
  url = f"https://www.googleapis.com/youtube/v3/search?key={YOUTUBE_API_KEY}&channelId={channel_id}&part=snippet,id&order=date&maxResults={max_results}"
92
  response = requests.get(url)
93
  videos = response.json().get('items', [])
@@ -105,38 +35,11 @@ def get_video_transcripts(video_ids):
105
  continue
106
  return all_transcripts
107
 
108
- # --- NEW FUNCTION: Scrape icode.guru ---
109
- def scrape_icodeguru(base_url="https://icode.guru", max_pages=5):
110
- visited = set()
111
- blocks = []
112
-
113
- def crawl(url):
114
- if url in visited or len(visited) >= max_pages:
115
- return
116
- visited.add(url)
117
- try:
118
- res = requests.get(url, timeout=10)
119
- soup = BeautifulSoup(res.content, "html.parser")
120
- page_text = soup.get_text(separator=" ", strip=True)
121
- if len(page_text) > 100:
122
- blocks.append(f"[Source]({url}):\n{page_text[:2000]}")
123
- for link in soup.find_all("a", href=True):
124
- href = link['href']
125
- if href.startswith("/"):
126
- href = base_url + href
127
- if href.startswith(base_url):
128
- crawl(href)
129
- except:
130
- pass
131
-
132
- crawl(base_url)
133
- return blocks
134
-
135
  # --- FUNCTION: Ask Groq API using official client ---
136
  def ask_groq(context, question):
137
  messages = [
138
- {"role": "system", "content": "You are a helpful assistant. Only answer using the given context (YouTube + icode.guru). Provide links if possible."},
139
- {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {question}\nAnswer:"}
140
  ]
141
  chat_completion = groq_client.chat.completions.create(
142
  model="llama-3.3-70b-versatile", # Or the model you have access to
@@ -146,29 +49,128 @@ def ask_groq(context, question):
146
 
147
  # --- STREAMLIT APP ---
148
  def main():
149
- st.set_page_config(page_title="EduBot - YouTube + iCodeGuru QA", layout="wide")
150
  st.title("🎓 EduBot for @icodeguru0")
151
- st.markdown("Ask anything based on the channel’s recent videos and website content from [icode.guru](https://icode.guru).")
152
 
153
  question = st.text_input("💬 Ask your question here:")
154
  if question:
155
  with st.spinner("🔍 Fetching videos and transcripts..."):
156
  video_ids = get_latest_video_ids(channel_id)
157
  transcripts = get_video_transcripts(video_ids)
158
- yt_context = "\n\n".join(transcripts)
159
-
160
- with st.spinner("🌐 Scraping icode.guru..."):
161
- site_blocks = scrape_icodeguru(BASE_URL, max_pages=5)
162
- site_context = "\n\n".join(site_blocks)
163
-
164
- full_context = yt_context + "\n\n" + site_context
165
-
166
  with st.spinner("🧠 Thinking..."):
167
  answer = ask_groq(full_context, question)
168
  st.success(answer)
169
 
170
  st.markdown("---")
171
- st.caption("Powered by YouTube + iCodeGuru + Groq | Built for @icodeguru0")
172
 
173
  if __name__ == "__main__":
174
  main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
 
2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  import nest_asyncio
4
  from youtube_transcript_api import YouTubeTranscriptApi
5
  import streamlit as st
6
  import os
7
  from groq import Groq
 
 
8
 
9
  nest_asyncio.apply()
10
 
11
  # --- CONFIGURATION ---
12
  YOUTUBE_API_KEY = os.environ.get("YOUTUBE_API_KEY") # Set in your HuggingFace Secrets
13
  channel_id = "UCsv3kmQ5k1eIRG2R9mWN" # @icodeguru0
 
14
 
15
  # Initialize Groq client once
16
  groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
17
 
18
  # --- FUNCTION: Fetch recent video IDs from YouTube channel ---
19
  def get_latest_video_ids(channel_id, max_results=5):
20
+ import requests
21
  url = f"https://www.googleapis.com/youtube/v3/search?key={YOUTUBE_API_KEY}&channelId={channel_id}&part=snippet,id&order=date&maxResults={max_results}"
22
  response = requests.get(url)
23
  videos = response.json().get('items', [])
 
35
  continue
36
  return all_transcripts
37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  # --- FUNCTION: Ask Groq API using official client ---
39
  def ask_groq(context, question):
40
  messages = [
41
+ {"role": "system", "content": "You are a helpful assistant."},
42
+ {"role": "user", "content": f"Context: {context}\n\nQuestion: {question}\nAnswer:"}
43
  ]
44
  chat_completion = groq_client.chat.completions.create(
45
  model="llama-3.3-70b-versatile", # Or the model you have access to
 
49
 
50
  # --- STREAMLIT APP ---
51
  def main():
52
+ st.set_page_config(page_title="EduBot - YouTube Channel QA", layout="wide")
53
  st.title("🎓 EduBot for @icodeguru0")
54
+ st.markdown("Ask anything based on the channel’s recent videos.")
55
 
56
  question = st.text_input("💬 Ask your question here:")
57
  if question:
58
  with st.spinner("🔍 Fetching videos and transcripts..."):
59
  video_ids = get_latest_video_ids(channel_id)
60
  transcripts = get_video_transcripts(video_ids)
61
+ full_context = "\n\n".join(transcripts)
 
 
 
 
 
 
 
62
  with st.spinner("🧠 Thinking..."):
63
  answer = ask_groq(full_context, question)
64
  st.success(answer)
65
 
66
  st.markdown("---")
67
+ st.caption("Powered by YouTube + Groq | Built for @icodeguru0")
68
 
69
  if __name__ == "__main__":
70
  main()
71
+
72
+
73
+ # import nest_asyncio
74
+ # from youtube_transcript_api import YouTubeTranscriptApi
75
+ # import streamlit as st
76
+ # import os
77
+ # from groq import Groq
78
+ # import requests
79
+ # from bs4 import BeautifulSoup
80
+
81
+ # nest_asyncio.apply()
82
+
83
+ # # --- CONFIGURATION ---
84
+ # YOUTUBE_API_KEY = os.environ.get("YOUTUBE_API_KEY") # Set in your HuggingFace Secrets
85
+ # channel_id = "UCsv3kmQ5k1eIRG2R9mWN" # @icodeguru0
86
+ # BASE_URL = "https://icode.guru"
87
+
88
+ # # Initialize Groq client once
89
+ # groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
90
+
91
+ # # --- FUNCTION: Fetch recent video IDs from YouTube channel ---
92
+ # def get_latest_video_ids(channel_id, max_results=5):
93
+ # url = f"https://www.googleapis.com/youtube/v3/search?key={YOUTUBE_API_KEY}&channelId={channel_id}&part=snippet,id&order=date&maxResults={max_results}"
94
+ # response = requests.get(url)
95
+ # videos = response.json().get('items', [])
96
+ # return [v['id']['videoId'] for v in videos if v['id']['kind'] == 'youtube#video']
97
+
98
+ # # --- FUNCTION: Get video transcripts ---
99
+ # def get_video_transcripts(video_ids):
100
+ # all_transcripts = []
101
+ # for vid in video_ids:
102
+ # try:
103
+ # transcript = YouTubeTranscriptApi.get_transcript(vid)
104
+ # text = " ".join([t['text'] for t in transcript])
105
+ # all_transcripts.append(text)
106
+ # except:
107
+ # continue
108
+ # return all_transcripts
109
+
110
+ # # --- NEW FUNCTION: Scrape icode.guru ---
111
+ # def scrape_icodeguru(base_url="https://icode.guru", max_pages=5):
112
+ # visited = set()
113
+ # blocks = []
114
+
115
+ # def crawl(url):
116
+ # if url in visited or len(visited) >= max_pages:
117
+ # return
118
+ # visited.add(url)
119
+ # try:
120
+ # res = requests.get(url, timeout=10)
121
+ # soup = BeautifulSoup(res.content, "html.parser")
122
+ # page_text = soup.get_text(separator=" ", strip=True)
123
+ # if len(page_text) > 100:
124
+ # blocks.append(f"[Source]({url}):\n{page_text[:2000]}")
125
+ # for link in soup.find_all("a", href=True):
126
+ # href = link['href']
127
+ # if href.startswith("/"):
128
+ # href = base_url + href
129
+ # if href.startswith(base_url):
130
+ # crawl(href)
131
+ # except:
132
+ # pass
133
+
134
+ # crawl(base_url)
135
+ # return blocks
136
+
137
+ # # --- FUNCTION: Ask Groq API using official client ---
138
+ # def ask_groq(context, question):
139
+ # messages = [
140
+ # {"role": "system", "content": "You are a helpful assistant. Only answer using the given context (YouTube + icode.guru). Provide links if possible."},
141
+ # {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {question}\nAnswer:"}
142
+ # ]
143
+ # chat_completion = groq_client.chat.completions.create(
144
+ # model="llama-3.3-70b-versatile", # Or the model you have access to
145
+ # messages=messages,
146
+ # )
147
+ # return chat_completion.choices[0].message.content.strip()
148
+
149
+ # # --- STREAMLIT APP ---
150
+ # def main():
151
+ # st.set_page_config(page_title="EduBot - YouTube + iCodeGuru QA", layout="wide")
152
+ # st.title("🎓 EduBot for @icodeguru0")
153
+ # st.markdown("Ask anything based on the channel’s recent videos and website content from [icode.guru](https://icode.guru).")
154
+
155
+ # question = st.text_input("💬 Ask your question here:")
156
+ # if question:
157
+ # with st.spinner("🔍 Fetching videos and transcripts..."):
158
+ # video_ids = get_latest_video_ids(channel_id)
159
+ # transcripts = get_video_transcripts(video_ids)
160
+ # yt_context = "\n\n".join(transcripts)
161
+
162
+ # with st.spinner("🌐 Scraping icode.guru..."):
163
+ # site_blocks = scrape_icodeguru(BASE_URL, max_pages=5)
164
+ # site_context = "\n\n".join(site_blocks)
165
+
166
+ # full_context = yt_context + "\n\n" + site_context
167
+
168
+ # with st.spinner("🧠 Thinking..."):
169
+ # answer = ask_groq(full_context, question)
170
+ # st.success(answer)
171
+
172
+ # st.markdown("---")
173
+ # st.caption("Powered by YouTube + iCodeGuru + Groq | Built for @icodeguru0")
174
+
175
+ # if __name__ == "__main__":
176
+ # main()