ytbbot-ai / app.py
simar007's picture
Update app.py
f916d1b verified
import os
import gradio as gr
from langchain_community.document_loaders import YoutubeLoader, PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpoint
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA
# --- CONFIGURATION ---
hf_token = os.getenv("HF_TOKEN")
repo_id = "mistralai/Mistral-7B-Instruct-v0.3"
# --- HELPER: LLM SETUP ---
def get_llm():
if not hf_token:
raise ValueError("HF_TOKEN not found in secrets.")
return HuggingFaceEndpoint(
repo_id=repo_id,
max_new_tokens=512,
temperature=0.3,
huggingfacehub_api_token=hf_token
)
# --- STATE ---
vector_db_state = None
# --- 1. PROCESSING ENGINE ---
def process_content(url, file_obj):
global vector_db_state
msg = ""
docs = []
try:
if url and "youtube.com" in url:
loader = YoutubeLoader.from_youtube_url(url, add_video_info=True)
docs = loader.load()
msg = f"βœ… Loaded YouTube: {docs[0].metadata['title']}"
elif file_obj is not None:
loader = PyPDFLoader(file_obj.name)
docs = loader.load()
msg = f"βœ… Loaded PDF: {os.path.basename(file_obj.name)}"
else:
return "⚠️ Please provide a YouTube URL or upload a PDF.", None
# Split text
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
splits = text_splitter.split_documents(docs)
# Create Vector Store
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vector_db_state = FAISS.from_documents(splits, embeddings)
return msg, vector_db_state
except Exception as e:
return f"❌ Error: {str(e)}", None
# --- 2. CORE FEATURES ---
def chat_engine(message, history):
if vector_db_state is None:
return "Please upload content first."
try:
llm = get_llm()
qa_chain = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=vector_db_state.as_retriever()
)
response = qa_chain.invoke(message)
return response['result']
except Exception as e:
return f"Error: {str(e)}"
def generate_summary():
if vector_db_state is None:
return "Please upload content first."
try:
llm = get_llm()
retriever = vector_db_state.as_retriever(search_kwargs={"k": 5})
docs = retriever.invoke("Summary")
context = "\n\n".join([d.page_content for d in docs])
prompt = f"""[INST] Summarize this content into bullet points:
{context}
[/INST]"""
return llm.invoke(prompt)
except Exception as e:
return f"Error: {str(e)}"
def generate_quiz():
if vector_db_state is None:
return "Please upload content first."
try:
llm = get_llm()
retriever = vector_db_state.as_retriever(search_kwargs={"k": 3})
docs = retriever.invoke("Key Concepts")
context = "\n\n".join([d.page_content for d in docs])
prompt = f"""[INST] Create 3 multiple choice questions (with answers) based on this:
{context}
[/INST]"""
return llm.invoke(prompt)
except Exception as e:
return f"Error: {str(e)}"
def generate_mindmap():
if vector_db_state is None:
return "Please upload content first."
try:
llm = get_llm()
retriever = vector_db_state.as_retriever(search_kwargs={"k": 4})
docs = retriever.invoke("Structure")
context = "\n\n".join([d.page_content for d in docs])
prompt = f"""[INST] Create a Mermaid.js mindmap (graph TD) code block based on this:
{context}
[/INST]"""
return llm.invoke(prompt)
except Exception as e:
return f"Error: {str(e)}"
# --- 3. UI ---
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# 🧠 OpenLearn AI (Stable Build)")
with gr.Row():
with gr.Column(scale=1):
yt_input = gr.Textbox(label="YouTube URL")
pdf_input = gr.File(label="Upload PDF")
process_btn = gr.Button("πŸš€ Process", variant="primary")
status_output = gr.Textbox(label="Status", interactive=False)
summ_btn = gr.Button("πŸ“ Summary")
quiz_btn = gr.Button("❓ Quiz")
map_btn = gr.Button("πŸ—ΊοΈ Mind Map")
with gr.Column(scale=2):
chatbot = gr.ChatInterface(fn=chat_engine)
with gr.Row():
summ_output = gr.Markdown(label="Summary")
quiz_output = gr.Markdown(label="Quiz")
map_output = gr.Code(label="Mind Map Code")
process_btn.click(process_content, inputs=[yt_input, pdf_input], outputs=[status_output])
summ_btn.click(generate_summary, inputs=None, outputs=summ_output)
quiz_btn.click(generate_quiz, inputs=None, outputs=quiz_output)
map_btn.click(generate_mindmap, inputs=None, outputs=map_output)
if __name__ == "__main__":
demo.launch()