Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -312,6 +312,185 @@
|
|
| 312 |
# if __name__ == "__main__":
|
| 313 |
# main()
|
| 314 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 315 |
|
| 316 |
|
| 317 |
|
|
@@ -326,8 +505,6 @@ from sentence_transformers import SentenceTransformer
|
|
| 326 |
import chromadb
|
| 327 |
from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction
|
| 328 |
|
| 329 |
-
import json
|
| 330 |
-
|
| 331 |
nest_asyncio.apply()
|
| 332 |
|
| 333 |
# --- CONFIGURATION ---
|
|
@@ -337,31 +514,13 @@ channel_id = "UCsv3kmQ5k1eIRG2R9mWN" # iCodeGuru
|
|
| 337 |
BASE_URL = "https://icode.guru"
|
| 338 |
|
| 339 |
groq_client = Groq(api_key=GROQ_API_KEY)
|
| 340 |
-
from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction
|
| 341 |
-
|
| 342 |
embedding_function = SentenceTransformerEmbeddingFunction("all-MiniLM-L6-v2")
|
| 343 |
|
| 344 |
chroma_client = chromadb.Client()
|
| 345 |
collection = chroma_client.get_or_create_collection("icodeguru_knowledge", embedding_function=embedding_function)
|
| 346 |
|
| 347 |
-
# ---
|
| 348 |
-
def
|
| 349 |
-
data = []
|
| 350 |
-
for file in uploaded_files:
|
| 351 |
-
if file.name.endswith(".txt"):
|
| 352 |
-
text = file.read().decode()
|
| 353 |
-
data.append({"id": file.name, "content": text})
|
| 354 |
-
elif file.name.endswith(".json"):
|
| 355 |
-
content = json.load(file)
|
| 356 |
-
for i, chunk in enumerate(content):
|
| 357 |
-
data.append({"id": f"{file.name}-{i}", "content": chunk})
|
| 358 |
-
return data
|
| 359 |
-
|
| 360 |
-
def search_vector_data(query, data):
|
| 361 |
-
if not data:
|
| 362 |
-
return None
|
| 363 |
-
collection = chroma_client.get_or_create_collection("temp_query", embedding_function=embedding_function)
|
| 364 |
-
collection.add(documents=[d["content"] for d in data], ids=[d["id"] for d in data])
|
| 365 |
results = collection.query(query_texts=[query], n_results=3)
|
| 366 |
if results and results["documents"]:
|
| 367 |
return "\n\n".join([doc for doc in results["documents"][0]])
|
|
@@ -446,16 +605,13 @@ def main():
|
|
| 446 |
st.title("π EduBot for @icodeguru0")
|
| 447 |
st.markdown("Ask anything based on the latest YouTube videos and website content of [icode.guru](https://icode.guru).")
|
| 448 |
|
| 449 |
-
uploaded_files = st.file_uploader("π Optionally upload your knowledge files (txt or json)", type=['txt', 'json'], accept_multiple_files=True)
|
| 450 |
user_question = st.text_input("π¬ Ask your question:")
|
| 451 |
|
| 452 |
if user_question:
|
| 453 |
-
vector_data = load_uploaded_vectors(uploaded_files) if uploaded_files else []
|
| 454 |
-
|
| 455 |
# Try vector DB first
|
| 456 |
-
vector_context = search_vector_data(user_question
|
| 457 |
if vector_context:
|
| 458 |
-
with st.spinner("π§ Answering from
|
| 459 |
answer = ask_groq(vector_context, user_question)
|
| 460 |
st.success(answer)
|
| 461 |
else:
|
|
@@ -491,3 +647,4 @@ def main():
|
|
| 491 |
|
| 492 |
if __name__ == "__main__":
|
| 493 |
main()
|
|
|
|
|
|
| 312 |
# if __name__ == "__main__":
|
| 313 |
# main()
|
| 314 |
|
| 315 |
+
#vectordb
|
| 316 |
+
|
| 317 |
+
|
| 318 |
+
# import nest_asyncio
|
| 319 |
+
# import streamlit as st
|
| 320 |
+
# import os
|
| 321 |
+
# import requests
|
| 322 |
+
# from youtube_transcript_api import YouTubeTranscriptApi
|
| 323 |
+
# from groq import Groq
|
| 324 |
+
# from bs4 import BeautifulSoup
|
| 325 |
+
# from sentence_transformers import SentenceTransformer
|
| 326 |
+
# import chromadb
|
| 327 |
+
# from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction
|
| 328 |
+
|
| 329 |
+
# import json
|
| 330 |
+
|
| 331 |
+
# nest_asyncio.apply()
|
| 332 |
+
|
| 333 |
+
# # --- CONFIGURATION ---
|
| 334 |
+
# YOUTUBE_API_KEY = os.environ.get("YOUTUBE_API_KEY")
|
| 335 |
+
# GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
|
| 336 |
+
# channel_id = "UCsv3kmQ5k1eIRG2R9mWN" # iCodeGuru
|
| 337 |
+
# BASE_URL = "https://icode.guru"
|
| 338 |
+
|
| 339 |
+
# groq_client = Groq(api_key=GROQ_API_KEY)
|
| 340 |
+
# from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction
|
| 341 |
+
|
| 342 |
+
# embedding_function = SentenceTransformerEmbeddingFunction("all-MiniLM-L6-v2")
|
| 343 |
+
|
| 344 |
+
# chroma_client = chromadb.Client()
|
| 345 |
+
# collection = chroma_client.get_or_create_collection("icodeguru_knowledge", embedding_function=embedding_function)
|
| 346 |
+
|
| 347 |
+
# # --- Upload + load files as vector DB ---
|
| 348 |
+
# def load_uploaded_vectors(uploaded_files):
|
| 349 |
+
# data = []
|
| 350 |
+
# for file in uploaded_files:
|
| 351 |
+
# if file.name.endswith(".txt"):
|
| 352 |
+
# text = file.read().decode()
|
| 353 |
+
# data.append({"id": file.name, "content": text})
|
| 354 |
+
# elif file.name.endswith(".json"):
|
| 355 |
+
# content = json.load(file)
|
| 356 |
+
# for i, chunk in enumerate(content):
|
| 357 |
+
# data.append({"id": f"{file.name}-{i}", "content": chunk})
|
| 358 |
+
# return data
|
| 359 |
+
|
| 360 |
+
# def search_vector_data(query, data):
|
| 361 |
+
# if not data:
|
| 362 |
+
# return None
|
| 363 |
+
# collection = chroma_client.get_or_create_collection("temp_query", embedding_function=embedding_function)
|
| 364 |
+
# collection.add(documents=[d["content"] for d in data], ids=[d["id"] for d in data])
|
| 365 |
+
# results = collection.query(query_texts=[query], n_results=3)
|
| 366 |
+
# if results and results["documents"]:
|
| 367 |
+
# return "\n\n".join([doc for doc in results["documents"][0]])
|
| 368 |
+
# return None
|
| 369 |
+
|
| 370 |
+
# # --- Fetch recent video IDs from YouTube channel ---
|
| 371 |
+
# def get_latest_video_ids(channel_id, max_results=5):
|
| 372 |
+
# url = f"https://www.googleapis.com/youtube/v3/search?key={YOUTUBE_API_KEY}&channelId={channel_id}&part=snippet,id&order=date&maxResults={max_results}"
|
| 373 |
+
# response = requests.get(url)
|
| 374 |
+
# videos = response.json().get('items', [])
|
| 375 |
+
|
| 376 |
+
# valid_videos = []
|
| 377 |
+
# for v in videos:
|
| 378 |
+
# if v['id']['kind'] == 'youtube#video':
|
| 379 |
+
# title = v['snippet']['title']
|
| 380 |
+
# channel_title = v['snippet']['channelTitle']
|
| 381 |
+
# video_id = v['id']['videoId']
|
| 382 |
+
# if "icodeguru" in channel_title.lower():
|
| 383 |
+
# valid_videos.append((video_id, title))
|
| 384 |
+
# return valid_videos
|
| 385 |
+
|
| 386 |
+
# # --- Get video transcripts ---
|
| 387 |
+
# def get_video_transcripts(video_info):
|
| 388 |
+
# results = []
|
| 389 |
+
# for vid, title in video_info:
|
| 390 |
+
# try:
|
| 391 |
+
# transcript = YouTubeTranscriptApi.get_transcript(vid)
|
| 392 |
+
# text = " ".join([t['text'] for t in transcript])
|
| 393 |
+
# video_link = f"https://www.youtube.com/watch?v={vid}"
|
| 394 |
+
# results.append({
|
| 395 |
+
# "video_id": vid,
|
| 396 |
+
# "title": title,
|
| 397 |
+
# "link": video_link,
|
| 398 |
+
# "transcript": text
|
| 399 |
+
# })
|
| 400 |
+
# except:
|
| 401 |
+
# continue
|
| 402 |
+
# return results
|
| 403 |
+
|
| 404 |
+
# # --- Scrape icode.guru ---
|
| 405 |
+
# def scrape_icodeguru(base_url=BASE_URL, max_pages=5):
|
| 406 |
+
# visited = set()
|
| 407 |
+
# blocks = []
|
| 408 |
+
|
| 409 |
+
# def crawl(url):
|
| 410 |
+
# if url in visited or len(visited) >= max_pages:
|
| 411 |
+
# return
|
| 412 |
+
# visited.add(url)
|
| 413 |
+
# try:
|
| 414 |
+
# res = requests.get(url, timeout=10)
|
| 415 |
+
# soup = BeautifulSoup(res.content, "html.parser")
|
| 416 |
+
# page_text = soup.get_text(separator=" ", strip=True)
|
| 417 |
+
# if len(page_text) > 100:
|
| 418 |
+
# blocks.append(f"[{url}]({url}):\n{page_text[:1500]}")
|
| 419 |
+
# for link in soup.find_all("a", href=True):
|
| 420 |
+
# href = link['href']
|
| 421 |
+
# if href.startswith("/"):
|
| 422 |
+
# href = base_url + href
|
| 423 |
+
# if href.startswith(base_url):
|
| 424 |
+
# crawl(href)
|
| 425 |
+
# except:
|
| 426 |
+
# pass
|
| 427 |
+
|
| 428 |
+
# crawl(base_url)
|
| 429 |
+
# return blocks
|
| 430 |
+
|
| 431 |
+
# # --- Ask Groq ---
|
| 432 |
+
# def ask_groq(context, question):
|
| 433 |
+
# messages = [
|
| 434 |
+
# {"role": "system", "content": "You are a helpful assistant. Always provide relevant video and website links if possible."},
|
| 435 |
+
# {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {question}\nAnswer (include links):"}
|
| 436 |
+
# ]
|
| 437 |
+
# chat_completion = groq_client.chat.completions.create(
|
| 438 |
+
# model="llama3-8b-8192",
|
| 439 |
+
# messages=messages,
|
| 440 |
+
# )
|
| 441 |
+
# return chat_completion.choices[0].message.content.strip()
|
| 442 |
+
|
| 443 |
+
# #--- STREAMLIT APP ---
|
| 444 |
+
# def main():
|
| 445 |
+
# st.set_page_config(page_title="EduBot for iCodeGuru", layout="wide")
|
| 446 |
+
# st.title("π EduBot for @icodeguru0")
|
| 447 |
+
# st.markdown("Ask anything based on the latest YouTube videos and website content of [icode.guru](https://icode.guru).")
|
| 448 |
+
|
| 449 |
+
# uploaded_files = st.file_uploader("π Optionally upload your knowledge files (txt or json)", type=['txt', 'json'], accept_multiple_files=True)
|
| 450 |
+
# user_question = st.text_input("π¬ Ask your question:")
|
| 451 |
+
|
| 452 |
+
# if user_question:
|
| 453 |
+
# vector_data = load_uploaded_vectors(uploaded_files) if uploaded_files else []
|
| 454 |
+
|
| 455 |
+
# # Try vector DB first
|
| 456 |
+
# vector_context = search_vector_data(user_question, vector_data)
|
| 457 |
+
# if vector_context:
|
| 458 |
+
# with st.spinner("π§ Answering from uploaded knowledge..."):
|
| 459 |
+
# answer = ask_groq(vector_context, user_question)
|
| 460 |
+
# st.success(answer)
|
| 461 |
+
# else:
|
| 462 |
+
# # Fallback to real-time data
|
| 463 |
+
# with st.spinner("πΊ Fetching YouTube videos..."):
|
| 464 |
+
# video_info = get_latest_video_ids(channel_id, max_results=5)
|
| 465 |
+
# transcripts = get_video_transcripts(video_info)
|
| 466 |
+
|
| 467 |
+
# yt_context = ""
|
| 468 |
+
# relevant_links = []
|
| 469 |
+
# for vid in transcripts:
|
| 470 |
+
# yt_context += f"\n\n[Video: {vid['title']}]({vid['link']}):\n{vid['transcript'][:1500]}"
|
| 471 |
+
# if user_question.lower() in vid['transcript'].lower():
|
| 472 |
+
# relevant_links.append(vid['link'])
|
| 473 |
+
|
| 474 |
+
# with st.spinner("π Scraping icode.guru..."):
|
| 475 |
+
# site_blocks = scrape_icodeguru(BASE_URL, max_pages=5)
|
| 476 |
+
# site_context = "\n\n".join(site_blocks)
|
| 477 |
+
|
| 478 |
+
# full_context = yt_context + "\n\n" + site_context
|
| 479 |
+
|
| 480 |
+
# with st.spinner("π§ Thinking..."):
|
| 481 |
+
# answer = ask_groq(full_context, user_question)
|
| 482 |
+
# st.success(answer)
|
| 483 |
+
|
| 484 |
+
# if relevant_links:
|
| 485 |
+
# st.markdown("### π Related YouTube Links")
|
| 486 |
+
# for link in relevant_links:
|
| 487 |
+
# st.markdown(f"- [Watch Video]({link})")
|
| 488 |
+
|
| 489 |
+
# st.markdown("---")
|
| 490 |
+
# st.caption("Powered by YouTube, iCodeGuru, and Groq")
|
| 491 |
+
|
| 492 |
+
# if __name__ == "__main__":
|
| 493 |
+
# main()
|
| 494 |
|
| 495 |
|
| 496 |
|
|
|
|
| 505 |
import chromadb
|
| 506 |
from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction
|
| 507 |
|
|
|
|
|
|
|
| 508 |
nest_asyncio.apply()
|
| 509 |
|
| 510 |
# --- CONFIGURATION ---
|
|
|
|
| 514 |
BASE_URL = "https://icode.guru"
|
| 515 |
|
| 516 |
groq_client = Groq(api_key=GROQ_API_KEY)
|
|
|
|
|
|
|
| 517 |
embedding_function = SentenceTransformerEmbeddingFunction("all-MiniLM-L6-v2")
|
| 518 |
|
| 519 |
chroma_client = chromadb.Client()
|
| 520 |
collection = chroma_client.get_or_create_collection("icodeguru_knowledge", embedding_function=embedding_function)
|
| 521 |
|
| 522 |
+
# --- Search persistent vector DB ---
|
| 523 |
+
def search_vector_data(query):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 524 |
results = collection.query(query_texts=[query], n_results=3)
|
| 525 |
if results and results["documents"]:
|
| 526 |
return "\n\n".join([doc for doc in results["documents"][0]])
|
|
|
|
| 605 |
st.title("π EduBot for @icodeguru0")
|
| 606 |
st.markdown("Ask anything based on the latest YouTube videos and website content of [icode.guru](https://icode.guru).")
|
| 607 |
|
|
|
|
| 608 |
user_question = st.text_input("π¬ Ask your question:")
|
| 609 |
|
| 610 |
if user_question:
|
|
|
|
|
|
|
| 611 |
# Try vector DB first
|
| 612 |
+
vector_context = search_vector_data(user_question)
|
| 613 |
if vector_context:
|
| 614 |
+
with st.spinner("π§ Answering from knowledge base..."):
|
| 615 |
answer = ask_groq(vector_context, user_question)
|
| 616 |
st.success(answer)
|
| 617 |
else:
|
|
|
|
| 647 |
|
| 648 |
if __name__ == "__main__":
|
| 649 |
main()
|
| 650 |
+
|