ilsa15 commited on
Commit
f006201
ยท
verified ยท
1 Parent(s): 1251c8d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +131 -23
app.py CHANGED
@@ -1,72 +1,180 @@
1
 
2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  import nest_asyncio
4
  from youtube_transcript_api import YouTubeTranscriptApi
5
  import streamlit as st
6
  import os
7
  from groq import Groq
 
 
8
 
9
  nest_asyncio.apply()
10
 
11
  # --- CONFIGURATION ---
12
- YOUTUBE_API_KEY = os.environ.get("YOUTUBE_API_KEY") # Set in your HuggingFace Secrets
13
  channel_id = "UCsv3kmQ5k1eIRG2R9mWN" # @icodeguru0
 
14
 
15
- # Initialize Groq client once
16
  groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
17
 
18
- # --- FUNCTION: Fetch recent video IDs from YouTube channel ---
19
  def get_latest_video_ids(channel_id, max_results=5):
20
- import requests
21
  url = f"https://www.googleapis.com/youtube/v3/search?key={YOUTUBE_API_KEY}&channelId={channel_id}&part=snippet,id&order=date&maxResults={max_results}"
22
  response = requests.get(url)
23
  videos = response.json().get('items', [])
24
  return [v['id']['videoId'] for v in videos if v['id']['kind'] == 'youtube#video']
25
 
26
- # --- FUNCTION: Get video transcripts ---
27
  def get_video_transcripts(video_ids):
28
- all_transcripts = []
29
  for vid in video_ids:
30
  try:
31
  transcript = YouTubeTranscriptApi.get_transcript(vid)
32
  text = " ".join([t['text'] for t in transcript])
33
- all_transcripts.append(text)
34
  except:
35
  continue
36
- return all_transcripts
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
- # --- FUNCTION: Ask Groq API using official client ---
39
- def ask_groq(context, question):
 
 
 
40
  messages = [
41
- {"role": "system", "content": "You are a helpful assistant."},
42
- {"role": "user", "content": f"Context: {context}\n\nQuestion: {question}\nAnswer:"}
43
  ]
44
  chat_completion = groq_client.chat.completions.create(
45
- model="llama-3.3-70b-versatile", # Or the model you have access to
46
  messages=messages,
47
  )
48
  return chat_completion.choices[0].message.content.strip()
49
 
50
  # --- STREAMLIT APP ---
51
  def main():
52
- st.set_page_config(page_title="EduBot - YouTube Channel QA", layout="wide")
53
- st.title("๐ŸŽ“ EduBot for @icodeguru0")
54
- st.markdown("Ask anything based on the channelโ€™s recent videos.")
55
 
56
  question = st.text_input("๐Ÿ’ฌ Ask your question here:")
57
  if question:
58
- with st.spinner("๐Ÿ” Fetching videos and transcripts..."):
59
  video_ids = get_latest_video_ids(channel_id)
60
- transcripts = get_video_transcripts(video_ids)
61
- full_context = "\n\n".join(transcripts)
 
 
 
 
 
 
62
  with st.spinner("๐Ÿง  Thinking..."):
63
- answer = ask_groq(full_context, question)
64
- st.success(answer)
 
 
65
 
66
  st.markdown("---")
67
- st.caption("Powered by YouTube + Groq | Built for @icodeguru0")
68
 
69
  if __name__ == "__main__":
70
  main()
71
 
72
-
 
1
 
2
 
3
+ # import nest_asyncio
4
+ # from youtube_transcript_api import YouTubeTranscriptApi
5
+ # import streamlit as st
6
+ # import os
7
+ # from groq import Groq
8
+
9
+ # nest_asyncio.apply()
10
+
11
+ # # --- CONFIGURATION ---
12
+ # YOUTUBE_API_KEY = os.environ.get("YOUTUBE_API_KEY") # Set in your HuggingFace Secrets
13
+ # channel_id = "UCsv3kmQ5k1eIRG2R9mWN" # @icodeguru0
14
+
15
+ # # Initialize Groq client once
16
+ # groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
17
+
18
+ # # --- FUNCTION: Fetch recent video IDs from YouTube channel ---
19
+ # def get_latest_video_ids(channel_id, max_results=5):
20
+ # import requests
21
+ # url = f"https://www.googleapis.com/youtube/v3/search?key={YOUTUBE_API_KEY}&channelId={channel_id}&part=snippet,id&order=date&maxResults={max_results}"
22
+ # response = requests.get(url)
23
+ # videos = response.json().get('items', [])
24
+ # return [v['id']['videoId'] for v in videos if v['id']['kind'] == 'youtube#video']
25
+
26
+ # # --- FUNCTION: Get video transcripts ---
27
+ # def get_video_transcripts(video_ids):
28
+ # all_transcripts = []
29
+ # for vid in video_ids:
30
+ # try:
31
+ # transcript = YouTubeTranscriptApi.get_transcript(vid)
32
+ # text = " ".join([t['text'] for t in transcript])
33
+ # all_transcripts.append(text)
34
+ # except:
35
+ # continue
36
+ # return all_transcripts
37
+
38
+ # # --- FUNCTION: Ask Groq API using official client ---
39
+ # def ask_groq(context, question):
40
+ # messages = [
41
+ # {"role": "system", "content": "You are a helpful assistant."},
42
+ # {"role": "user", "content": f"Context: {context}\n\nQuestion: {question}\nAnswer:"}
43
+ # ]
44
+ # chat_completion = groq_client.chat.completions.create(
45
+ # model="llama-3.3-70b-versatile", # Or the model you have access to
46
+ # messages=messages,
47
+ # )
48
+ # return chat_completion.choices[0].message.content.strip()
49
+
50
+ # # --- STREAMLIT APP ---
51
+ # def main():
52
+ # st.set_page_config(page_title="EduBot - YouTube Channel QA", layout="wide")
53
+ # st.title("๐ŸŽ“ EduBot for @icodeguru0")
54
+ # st.markdown("Ask anything based on the channelโ€™s recent videos.")
55
+
56
+ # question = st.text_input("๐Ÿ’ฌ Ask your question here:")
57
+ # if question:
58
+ # with st.spinner("๐Ÿ” Fetching videos and transcripts..."):
59
+ # video_ids = get_latest_video_ids(channel_id)
60
+ # transcripts = get_video_transcripts(video_ids)
61
+ # full_context = "\n\n".join(transcripts)
62
+ # with st.spinner("๐Ÿง  Thinking..."):
63
+ # answer = ask_groq(full_context, question)
64
+ # st.success(answer)
65
+
66
+ # st.markdown("---")
67
+ # st.caption("Powered by YouTube + Groq | Built for @icodeguru0")
68
+
69
+ # if __name__ == "__main__":
70
+ # main()
71
+
72
+
73
+
74
  import nest_asyncio
75
  from youtube_transcript_api import YouTubeTranscriptApi
76
  import streamlit as st
77
  import os
78
  from groq import Groq
79
+ import requests
80
+ from bs4 import BeautifulSoup
81
 
82
  nest_asyncio.apply()
83
 
84
  # --- CONFIGURATION ---
85
+ YOUTUBE_API_KEY = os.environ.get("YOUTUBE_API_KEY")
86
  channel_id = "UCsv3kmQ5k1eIRG2R9mWN" # @icodeguru0
87
+ TARGET_WEBSITE = "https://icodeguru.com"
88
 
 
89
  groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
90
 
91
+ # --- FUNCTION: Fetch recent YouTube video IDs ---
92
  def get_latest_video_ids(channel_id, max_results=5):
 
93
  url = f"https://www.googleapis.com/youtube/v3/search?key={YOUTUBE_API_KEY}&channelId={channel_id}&part=snippet,id&order=date&maxResults={max_results}"
94
  response = requests.get(url)
95
  videos = response.json().get('items', [])
96
  return [v['id']['videoId'] for v in videos if v['id']['kind'] == 'youtube#video']
97
 
98
+ # --- FUNCTION: Get YouTube video transcripts ---
99
  def get_video_transcripts(video_ids):
100
+ transcripts = []
101
  for vid in video_ids:
102
  try:
103
  transcript = YouTubeTranscriptApi.get_transcript(vid)
104
  text = " ".join([t['text'] for t in transcript])
105
+ transcripts.append(f"(YouTube Video ID: {vid})\n{text}")
106
  except:
107
  continue
108
+ return transcripts
109
+
110
+ # --- FUNCTION: Scrape textual content + URLs from a website ---
111
+ def scrape_website_text_and_links(base_url, max_pages=5):
112
+ visited = set()
113
+ data = []
114
+
115
+ def scrape(url):
116
+ if url in visited or len(visited) >= max_pages:
117
+ return
118
+ visited.add(url)
119
+ try:
120
+ res = requests.get(url, timeout=10)
121
+ soup = BeautifulSoup(res.content, "html.parser")
122
+ text = soup.get_text(separator=" ", strip=True)
123
+ data.append({"url": url, "content": text[:2000]}) # Limit content per page
124
+ for link in soup.find_all("a", href=True):
125
+ href = link['href']
126
+ if href.startswith("/") and not href.startswith("//"):
127
+ href = base_url + href
128
+ if href.startswith(base_url):
129
+ scrape(href)
130
+ except:
131
+ pass
132
+
133
+ scrape(base_url)
134
+ return data
135
 
136
+ # --- FUNCTION: Ask Groq with context ---
137
+ def ask_groq(context_blocks, question):
138
+ formatted_context = "\n\n".join(
139
+ [f"[Source]({block['url']}):\n{block['content']}" for block in context_blocks]
140
+ )
141
  messages = [
142
+ {"role": "system", "content": "You are a helpful assistant that gives direct answers with reference links from the source."},
143
+ {"role": "user", "content": f"Context:\n{formatted_context}\n\nQuestion: {question}\nAnswer with clickable links:"}
144
  ]
145
  chat_completion = groq_client.chat.completions.create(
146
+ model="llama-3.3-70b-versatile",
147
  messages=messages,
148
  )
149
  return chat_completion.choices[0].message.content.strip()
150
 
151
  # --- STREAMLIT APP ---
152
  def main():
153
+ st.set_page_config(page_title="EduBot - YouTube + Website QA", layout="wide")
154
+ st.title("๐Ÿ“š EduBot for @icodeguru0")
155
+ st.markdown("Ask your question based on **latest YouTube videos** and **icodeguru.com website** content.")
156
 
157
  question = st.text_input("๐Ÿ’ฌ Ask your question here:")
158
  if question:
159
+ with st.spinner("๐ŸŽฅ Fetching videos and transcripts..."):
160
  video_ids = get_latest_video_ids(channel_id)
161
+ yt_transcripts = get_video_transcripts(video_ids)
162
+ yt_blocks = [{"url": f"https://www.youtube.com/watch?v={vid}", "content": txt} for vid, txt in zip(video_ids, yt_transcripts)]
163
+
164
+ with st.spinner("๐ŸŒ Scraping website content..."):
165
+ website_blocks = scrape_website_text_and_links(TARGET_WEBSITE, max_pages=5)
166
+
167
+ context_blocks = yt_blocks + website_blocks
168
+
169
  with st.spinner("๐Ÿง  Thinking..."):
170
+ answer = ask_groq(context_blocks, question)
171
+
172
+ st.markdown("### โœ… Answer:")
173
+ st.markdown(answer, unsafe_allow_html=True)
174
 
175
  st.markdown("---")
176
+ st.caption("Powered by YouTube + iCodeGuru.com + Groq | Built for @icodeguru0")
177
 
178
  if __name__ == "__main__":
179
  main()
180