Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| from langchain_community.document_loaders import PyPDFLoader | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from langchain_community.embeddings import HuggingFaceEmbeddings | |
| from langchain_community.vectorstores import FAISS | |
| from langchain_groq import ChatGroq | |
| from langchain_core.prompts import ChatPromptTemplate | |
| from langchain_core.runnables import RunnablePassthrough | |
| from langchain_core.output_parsers import StrOutputParser | |
| # βββββββββββββββββββββββββ CONFIG βββββββββββββββββββββββββ | |
| EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2" | |
| GROQ_MODEL = "llama-3.1-8b-instant" | |
| TOP_K = 3 | |
| os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY") | |
| # βββββββββββββββββββββββββ INIT MODELS βββββββββββββββββββββββββ | |
| embeddings = HuggingFaceEmbeddings( | |
| model_name=EMBED_MODEL, | |
| model_kwargs={"device": "cpu"}, | |
| encode_kwargs={"normalize_embeddings": True} | |
| ) | |
| def create_llm(): | |
| return ChatGroq( | |
| model=GROQ_MODEL, | |
| temperature=0.2, | |
| max_tokens=1024, | |
| groq_api_key=os.environ["GROQ_API_KEY"] | |
| ) | |
| RAG_PROMPT = ChatPromptTemplate.from_template(""" | |
| You are a helpful assistant. | |
| Answer ONLY using the context below. | |
| If not found, say you don't have enough information. | |
| Context: | |
| {context} | |
| Question: {question} | |
| Answer: | |
| """) | |
| def format_docs(docs): | |
| return "\n\n".join(d.page_content for d in docs) | |
| # βββββββββββββββββββββββββ GLOBAL STATE βββββββββββββββββββββββββ | |
| vectorstore = None | |
| rag_chain = None | |
| # βββββββββββββββββββββββββ PROCESS PDF βββββββββββββββββββββββββ | |
| def process_pdf(file): | |
| global vectorstore, rag_chain | |
| if file is None: | |
| return "Upload a PDF first." | |
| path = file.name | |
| # Load | |
| loader = PyPDFLoader(path) | |
| docs = loader.load() | |
| # Split | |
| splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=500, | |
| chunk_overlap=50 | |
| ) | |
| chunks = splitter.split_documents(docs) | |
| # Vector store | |
| if vectorstore is None: | |
| vectorstore = FAISS.from_documents(chunks, embeddings) | |
| else: | |
| vectorstore.add_documents(chunks) | |
| retriever = vectorstore.as_retriever(search_kwargs={"k": TOP_K}) | |
| llm = create_llm() | |
| rag_chain = ( | |
| { | |
| "context": retriever | format_docs, | |
| "question": RunnablePassthrough() | |
| } | |
| | RAG_PROMPT | |
| | llm | |
| | StrOutputParser() | |
| ) | |
| return f"β PDF processed successfully!\nChunks: {len(chunks)}" | |
| # βββββββββββββββββββββββββ CHAT FUNCTION βββββββββββββββββββββββββ | |
| def chat(message, history): | |
| if rag_chain is None: | |
| history.append({"role": "user", "content": message}) | |
| history.append({"role": "assistant", "content": "Please upload a PDF first."}) | |
| return "", history | |
| response = rag_chain.invoke(message) | |
| history.append({"role": "user", "content": message}) | |
| history.append({"role": "assistant", "content": response}) | |
| return "", history | |
| # βββββββββββββββββββββββββ UI βββββββββββββββββββββββββ | |
| with gr.Blocks(title="RAG Chatbot") as demo: | |
| gr.Markdown("## π PDF RAG Chatbot (Groq + FAISS + LangChain)") | |
| with gr.Row(): | |
| file = gr.File(label="Upload PDF") | |
| upload_btn = gr.Button("Process PDF") | |
| status = gr.Textbox(label="Status") | |
| chatbot = gr.Chatbot() | |
| msg = gr.Textbox(label="Ask a question") | |
| upload_btn.click(process_pdf, inputs=file, outputs=status) | |
| msg.submit(chat, inputs=[msg, chatbot], outputs=[msg, chatbot]) | |
| demo.launch() |