Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -311,6 +311,10 @@
|
|
| 311 |
|
| 312 |
# if __name__ == "__main__":
|
| 313 |
# main()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 314 |
import nest_asyncio
|
| 315 |
import streamlit as st
|
| 316 |
import os
|
|
@@ -321,6 +325,7 @@ from bs4 import BeautifulSoup
|
|
| 321 |
from sentence_transformers import SentenceTransformer
|
| 322 |
import chromadb
|
| 323 |
from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction
|
|
|
|
| 324 |
import json
|
| 325 |
|
| 326 |
nest_asyncio.apply()
|
|
@@ -332,21 +337,36 @@ channel_id = "UCsv3kmQ5k1eIRG2R9mWN" # iCodeGuru
|
|
| 332 |
BASE_URL = "https://icode.guru"
|
| 333 |
|
| 334 |
groq_client = Groq(api_key=GROQ_API_KEY)
|
| 335 |
-
|
|
|
|
| 336 |
|
| 337 |
chroma_client = chromadb.Client()
|
| 338 |
-
collection = chroma_client.get_or_create_collection(
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 344 |
results = collection.query(query_texts=[query], n_results=3)
|
| 345 |
if results and results["documents"]:
|
| 346 |
-
return "\n\n".join(results["documents"][0])
|
| 347 |
return None
|
| 348 |
|
| 349 |
-
# --- Fetch recent
|
| 350 |
def get_latest_video_ids(channel_id, max_results=5):
|
| 351 |
url = f"https://www.googleapis.com/youtube/v3/search?key={YOUTUBE_API_KEY}&channelId={channel_id}&part=snippet,id&order=date&maxResults={max_results}"
|
| 352 |
response = requests.get(url)
|
|
@@ -425,19 +445,22 @@ def main():
|
|
| 425 |
st.title("π EduBot for @icodeguru0")
|
| 426 |
st.markdown("Ask anything based on the latest YouTube videos and website content of [icode.guru](https://icode.guru).")
|
| 427 |
|
|
|
|
| 428 |
user_question = st.text_input("π¬ Ask your question:")
|
| 429 |
|
| 430 |
if user_question:
|
| 431 |
-
|
| 432 |
-
|
|
|
|
|
|
|
| 433 |
if vector_context:
|
| 434 |
-
with st.spinner("π§ Answering from
|
| 435 |
answer = ask_groq(vector_context, user_question)
|
| 436 |
st.success(answer)
|
| 437 |
else:
|
| 438 |
-
#
|
| 439 |
-
with st.spinner("πΊ Fetching
|
| 440 |
-
video_info = get_latest_video_ids(channel_id)
|
| 441 |
transcripts = get_video_transcripts(video_info)
|
| 442 |
|
| 443 |
yt_context = ""
|
|
@@ -447,8 +470,8 @@ def main():
|
|
| 447 |
if user_question.lower() in vid['transcript'].lower():
|
| 448 |
relevant_links.append(vid['link'])
|
| 449 |
|
| 450 |
-
with st.spinner("π Scraping
|
| 451 |
-
site_blocks = scrape_icodeguru()
|
| 452 |
site_context = "\n\n".join(site_blocks)
|
| 453 |
|
| 454 |
full_context = yt_context + "\n\n" + site_context
|
|
@@ -467,4 +490,3 @@ def main():
|
|
| 467 |
|
| 468 |
if __name__ == "__main__":
|
| 469 |
main()
|
| 470 |
-
|
|
|
|
| 311 |
|
| 312 |
# if __name__ == "__main__":
|
| 313 |
# main()
|
| 314 |
+
|
| 315 |
+
|
| 316 |
+
|
| 317 |
+
|
| 318 |
import nest_asyncio
|
| 319 |
import streamlit as st
|
| 320 |
import os
|
|
|
|
| 325 |
from sentence_transformers import SentenceTransformer
|
| 326 |
import chromadb
|
| 327 |
from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction
|
| 328 |
+
|
| 329 |
import json
|
| 330 |
|
| 331 |
nest_asyncio.apply()
|
|
|
|
| 337 |
BASE_URL = "https://icode.guru"
|
| 338 |
|
| 339 |
groq_client = Groq(api_key=GROQ_API_KEY)
|
| 340 |
+
embed_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
|
| 341 |
+
embedding_function = SentenceTransformerEmbeddingFunction(embed_model)
|
| 342 |
|
| 343 |
chroma_client = chromadb.Client()
|
| 344 |
+
collection = chroma_client.get_or_create_collection("icodeguru_knowledge", embedding_function=embedding_function)
|
| 345 |
+
|
| 346 |
+
# --- Upload + load files as vector DB ---
|
| 347 |
+
def load_uploaded_vectors(uploaded_files):
|
| 348 |
+
data = []
|
| 349 |
+
for file in uploaded_files:
|
| 350 |
+
if file.name.endswith(".txt"):
|
| 351 |
+
text = file.read().decode()
|
| 352 |
+
data.append({"id": file.name, "content": text})
|
| 353 |
+
elif file.name.endswith(".json"):
|
| 354 |
+
content = json.load(file)
|
| 355 |
+
for i, chunk in enumerate(content):
|
| 356 |
+
data.append({"id": f"{file.name}-{i}", "content": chunk})
|
| 357 |
+
return data
|
| 358 |
+
|
| 359 |
+
def search_vector_data(query, data):
|
| 360 |
+
if not data:
|
| 361 |
+
return None
|
| 362 |
+
collection = chroma_client.get_or_create_collection("temp_query", embedding_function=embedding_function)
|
| 363 |
+
collection.add(documents=[d["content"] for d in data], ids=[d["id"] for d in data])
|
| 364 |
results = collection.query(query_texts=[query], n_results=3)
|
| 365 |
if results and results["documents"]:
|
| 366 |
+
return "\n\n".join([doc for doc in results["documents"][0]])
|
| 367 |
return None
|
| 368 |
|
| 369 |
+
# --- Fetch recent video IDs from YouTube channel ---
|
| 370 |
def get_latest_video_ids(channel_id, max_results=5):
|
| 371 |
url = f"https://www.googleapis.com/youtube/v3/search?key={YOUTUBE_API_KEY}&channelId={channel_id}&part=snippet,id&order=date&maxResults={max_results}"
|
| 372 |
response = requests.get(url)
|
|
|
|
| 445 |
st.title("π EduBot for @icodeguru0")
|
| 446 |
st.markdown("Ask anything based on the latest YouTube videos and website content of [icode.guru](https://icode.guru).")
|
| 447 |
|
| 448 |
+
uploaded_files = st.file_uploader("π Optionally upload your knowledge files (txt or json)", type=['txt', 'json'], accept_multiple_files=True)
|
| 449 |
user_question = st.text_input("π¬ Ask your question:")
|
| 450 |
|
| 451 |
if user_question:
|
| 452 |
+
vector_data = load_uploaded_vectors(uploaded_files) if uploaded_files else []
|
| 453 |
+
|
| 454 |
+
# Try vector DB first
|
| 455 |
+
vector_context = search_vector_data(user_question, vector_data)
|
| 456 |
if vector_context:
|
| 457 |
+
with st.spinner("π§ Answering from uploaded knowledge..."):
|
| 458 |
answer = ask_groq(vector_context, user_question)
|
| 459 |
st.success(answer)
|
| 460 |
else:
|
| 461 |
+
# Fallback to real-time data
|
| 462 |
+
with st.spinner("πΊ Fetching YouTube videos..."):
|
| 463 |
+
video_info = get_latest_video_ids(channel_id, max_results=5)
|
| 464 |
transcripts = get_video_transcripts(video_info)
|
| 465 |
|
| 466 |
yt_context = ""
|
|
|
|
| 470 |
if user_question.lower() in vid['transcript'].lower():
|
| 471 |
relevant_links.append(vid['link'])
|
| 472 |
|
| 473 |
+
with st.spinner("π Scraping icode.guru..."):
|
| 474 |
+
site_blocks = scrape_icodeguru(BASE_URL, max_pages=5)
|
| 475 |
site_context = "\n\n".join(site_blocks)
|
| 476 |
|
| 477 |
full_context = yt_context + "\n\n" + site_context
|
|
|
|
| 490 |
|
| 491 |
if __name__ == "__main__":
|
| 492 |
main()
|
|
|