import os import gradio as gr from langchain_community.document_loaders import YoutubeLoader, PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpoint from langchain_community.vectorstores import FAISS from langchain.chains import RetrievalQA # --- CONFIGURATION --- hf_token = os.getenv("HF_TOKEN") repo_id = "mistralai/Mistral-7B-Instruct-v0.3" # --- HELPER: LLM SETUP --- def get_llm(): if not hf_token: raise ValueError("HF_TOKEN not found in secrets.") return HuggingFaceEndpoint( repo_id=repo_id, max_new_tokens=512, temperature=0.3, huggingfacehub_api_token=hf_token ) # --- STATE --- vector_db_state = None # --- 1. PROCESSING ENGINE --- def process_content(url, file_obj): global vector_db_state msg = "" docs = [] try: if url and "youtube.com" in url: loader = YoutubeLoader.from_youtube_url(url, add_video_info=True) docs = loader.load() msg = f"✅ Loaded YouTube: {docs[0].metadata['title']}" elif file_obj is not None: loader = PyPDFLoader(file_obj.name) docs = loader.load() msg = f"✅ Loaded PDF: {os.path.basename(file_obj.name)}" else: return "⚠️ Please provide a YouTube URL or upload a PDF.", None # Split text text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) splits = text_splitter.split_documents(docs) # Create Vector Store embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") vector_db_state = FAISS.from_documents(splits, embeddings) return msg, vector_db_state except Exception as e: return f"❌ Error: {str(e)}", None # --- 2. CORE FEATURES --- def chat_engine(message, history): if vector_db_state is None: return "Please upload content first." try: llm = get_llm() qa_chain = RetrievalQA.from_chain_type( llm=llm, chain_type="stuff", retriever=vector_db_state.as_retriever() ) response = qa_chain.invoke(message) return response['result'] except Exception as e: return f"Error: {str(e)}" def generate_summary(): if vector_db_state is None: return "Please upload content first." try: llm = get_llm() retriever = vector_db_state.as_retriever(search_kwargs={"k": 5}) docs = retriever.invoke("Summary") context = "\n\n".join([d.page_content for d in docs]) prompt = f"""[INST] Summarize this content into bullet points: {context} [/INST]""" return llm.invoke(prompt) except Exception as e: return f"Error: {str(e)}" def generate_quiz(): if vector_db_state is None: return "Please upload content first." try: llm = get_llm() retriever = vector_db_state.as_retriever(search_kwargs={"k": 3}) docs = retriever.invoke("Key Concepts") context = "\n\n".join([d.page_content for d in docs]) prompt = f"""[INST] Create 3 multiple choice questions (with answers) based on this: {context} [/INST]""" return llm.invoke(prompt) except Exception as e: return f"Error: {str(e)}" def generate_mindmap(): if vector_db_state is None: return "Please upload content first." try: llm = get_llm() retriever = vector_db_state.as_retriever(search_kwargs={"k": 4}) docs = retriever.invoke("Structure") context = "\n\n".join([d.page_content for d in docs]) prompt = f"""[INST] Create a Mermaid.js mindmap (graph TD) code block based on this: {context} [/INST]""" return llm.invoke(prompt) except Exception as e: return f"Error: {str(e)}" # --- 3. UI --- with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown("# 🧠 OpenLearn AI (Stable Build)") with gr.Row(): with gr.Column(scale=1): yt_input = gr.Textbox(label="YouTube URL") pdf_input = gr.File(label="Upload PDF") process_btn = gr.Button("🚀 Process", variant="primary") status_output = gr.Textbox(label="Status", interactive=False) summ_btn = gr.Button("📝 Summary") quiz_btn = gr.Button("❓ Quiz") map_btn = gr.Button("🗺️ Mind Map") with gr.Column(scale=2): chatbot = gr.ChatInterface(fn=chat_engine) with gr.Row(): summ_output = gr.Markdown(label="Summary") quiz_output = gr.Markdown(label="Quiz") map_output = gr.Code(label="Mind Map Code") process_btn.click(process_content, inputs=[yt_input, pdf_input], outputs=[status_output]) summ_btn.click(generate_summary, inputs=None, outputs=summ_output) quiz_btn.click(generate_quiz, inputs=None, outputs=quiz_output) map_btn.click(generate_mindmap, inputs=None, outputs=map_output) if __name__ == "__main__": demo.launch()