|
|
import os |
|
|
import gradio as gr |
|
|
from langchain_community.document_loaders import YoutubeLoader, PyPDFLoader |
|
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
|
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpoint |
|
|
from langchain_community.vectorstores import FAISS |
|
|
from langchain.chains import RetrievalQA |
|
|
|
|
|
|
|
|
hf_token = os.getenv("HF_TOKEN") |
|
|
repo_id = "mistralai/Mistral-7B-Instruct-v0.3" |
|
|
|
|
|
|
|
|
def get_llm(): |
|
|
if not hf_token: |
|
|
raise ValueError("HF_TOKEN not found in secrets.") |
|
|
return HuggingFaceEndpoint( |
|
|
repo_id=repo_id, |
|
|
max_new_tokens=512, |
|
|
temperature=0.3, |
|
|
huggingfacehub_api_token=hf_token |
|
|
) |
|
|
|
|
|
|
|
|
vector_db_state = None |
|
|
|
|
|
|
|
|
def process_content(url, file_obj): |
|
|
global vector_db_state |
|
|
msg = "" |
|
|
docs = [] |
|
|
|
|
|
try: |
|
|
if url and "youtube.com" in url: |
|
|
loader = YoutubeLoader.from_youtube_url(url, add_video_info=True) |
|
|
docs = loader.load() |
|
|
msg = f"β
Loaded YouTube: {docs[0].metadata['title']}" |
|
|
elif file_obj is not None: |
|
|
loader = PyPDFLoader(file_obj.name) |
|
|
docs = loader.load() |
|
|
msg = f"β
Loaded PDF: {os.path.basename(file_obj.name)}" |
|
|
else: |
|
|
return "β οΈ Please provide a YouTube URL or upload a PDF.", None |
|
|
|
|
|
|
|
|
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) |
|
|
splits = text_splitter.split_documents(docs) |
|
|
|
|
|
|
|
|
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") |
|
|
vector_db_state = FAISS.from_documents(splits, embeddings) |
|
|
|
|
|
return msg, vector_db_state |
|
|
|
|
|
except Exception as e: |
|
|
return f"β Error: {str(e)}", None |
|
|
|
|
|
|
|
|
def chat_engine(message, history): |
|
|
if vector_db_state is None: |
|
|
return "Please upload content first." |
|
|
|
|
|
try: |
|
|
llm = get_llm() |
|
|
qa_chain = RetrievalQA.from_chain_type( |
|
|
llm=llm, |
|
|
chain_type="stuff", |
|
|
retriever=vector_db_state.as_retriever() |
|
|
) |
|
|
response = qa_chain.invoke(message) |
|
|
return response['result'] |
|
|
except Exception as e: |
|
|
return f"Error: {str(e)}" |
|
|
|
|
|
def generate_summary(): |
|
|
if vector_db_state is None: |
|
|
return "Please upload content first." |
|
|
|
|
|
try: |
|
|
llm = get_llm() |
|
|
retriever = vector_db_state.as_retriever(search_kwargs={"k": 5}) |
|
|
docs = retriever.invoke("Summary") |
|
|
context = "\n\n".join([d.page_content for d in docs]) |
|
|
|
|
|
prompt = f"""[INST] Summarize this content into bullet points: |
|
|
{context} |
|
|
[/INST]""" |
|
|
return llm.invoke(prompt) |
|
|
except Exception as e: |
|
|
return f"Error: {str(e)}" |
|
|
|
|
|
def generate_quiz(): |
|
|
if vector_db_state is None: |
|
|
return "Please upload content first." |
|
|
|
|
|
try: |
|
|
llm = get_llm() |
|
|
retriever = vector_db_state.as_retriever(search_kwargs={"k": 3}) |
|
|
docs = retriever.invoke("Key Concepts") |
|
|
context = "\n\n".join([d.page_content for d in docs]) |
|
|
|
|
|
prompt = f"""[INST] Create 3 multiple choice questions (with answers) based on this: |
|
|
{context} |
|
|
[/INST]""" |
|
|
return llm.invoke(prompt) |
|
|
except Exception as e: |
|
|
return f"Error: {str(e)}" |
|
|
|
|
|
def generate_mindmap(): |
|
|
if vector_db_state is None: |
|
|
return "Please upload content first." |
|
|
|
|
|
try: |
|
|
llm = get_llm() |
|
|
retriever = vector_db_state.as_retriever(search_kwargs={"k": 4}) |
|
|
docs = retriever.invoke("Structure") |
|
|
context = "\n\n".join([d.page_content for d in docs]) |
|
|
|
|
|
prompt = f"""[INST] Create a Mermaid.js mindmap (graph TD) code block based on this: |
|
|
{context} |
|
|
[/INST]""" |
|
|
return llm.invoke(prompt) |
|
|
except Exception as e: |
|
|
return f"Error: {str(e)}" |
|
|
|
|
|
|
|
|
with gr.Blocks(theme=gr.themes.Soft()) as demo: |
|
|
gr.Markdown("# π§ OpenLearn AI (Stable Build)") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=1): |
|
|
yt_input = gr.Textbox(label="YouTube URL") |
|
|
pdf_input = gr.File(label="Upload PDF") |
|
|
process_btn = gr.Button("π Process", variant="primary") |
|
|
status_output = gr.Textbox(label="Status", interactive=False) |
|
|
|
|
|
summ_btn = gr.Button("π Summary") |
|
|
quiz_btn = gr.Button("β Quiz") |
|
|
map_btn = gr.Button("πΊοΈ Mind Map") |
|
|
|
|
|
with gr.Column(scale=2): |
|
|
chatbot = gr.ChatInterface(fn=chat_engine) |
|
|
|
|
|
with gr.Row(): |
|
|
summ_output = gr.Markdown(label="Summary") |
|
|
quiz_output = gr.Markdown(label="Quiz") |
|
|
map_output = gr.Code(label="Mind Map Code") |
|
|
|
|
|
process_btn.click(process_content, inputs=[yt_input, pdf_input], outputs=[status_output]) |
|
|
summ_btn.click(generate_summary, inputs=None, outputs=summ_output) |
|
|
quiz_btn.click(generate_quiz, inputs=None, outputs=quiz_output) |
|
|
map_btn.click(generate_mindmap, inputs=None, outputs=map_output) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |