Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import os | |
| from langchain_community.document_loaders import PyPDFLoader | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain_community.vectorstores import FAISS | |
| from langchain_core.prompts import ChatPromptTemplate | |
| from langchain_core.output_parsers import StrOutputParser | |
| from langchain_core.runnables import RunnablePassthrough | |
| from langchain_community.embeddings import HuggingFaceEmbeddings | |
| from groq import Groq # Using Groq's direct client instead of LangChain wrapper | |
| from dotenv import load_dotenv | |
| # Initialize Groq client with your direct API key | |
| GROQ_API_KEY = "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" # Paste your actual Groq API key here | |
| groq_client = Groq(api_key=GROQ_API_KEY) | |
| def process_pdf(file): | |
| try: | |
| # Save uploaded file | |
| filepath = file.name | |
| # Load PDF | |
| loader = PyPDFLoader(filepath) | |
| documents = loader.load() | |
| # Split into chunks | |
| text_splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=1000, | |
| chunk_overlap=200, | |
| separators=["\n\n", "\n", " ", ""] | |
| ) | |
| docs = text_splitter.split_documents(documents) | |
| # Create vector store with HuggingFace embeddings | |
| embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
| vectorstore = FAISS.from_documents(docs, embeddings) | |
| # Save vectorstore for later use | |
| vectorstore.save_local("faiss_index") | |
| # Return the first chunk for preview and document info | |
| doc_info = f"Processed {len(docs)} chunks from {len(documents)} pages." | |
| return docs[0].page_content, doc_info | |
| except Exception as e: | |
| return f"Error: {str(e)}", "" | |
| def answer_question(question): | |
| try: | |
| # Load vector store | |
| embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
| vectorstore = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True) | |
| # Create retriever | |
| retriever = vectorstore.as_retriever(search_kwargs={"k": 3}) | |
| # Get relevant documents | |
| docs = retriever.invoke(question) | |
| context = "\n\n".join([doc.page_content for doc in docs]) | |
| # Create prompt template for LLaMA 3 | |
| prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|> | |
| You are a helpful AI assistant that answers questions based on the provided context. | |
| Use only the information from the context to answer the question. If you don't know the answer, say you don't know. | |
| Be concise and accurate in your responses.<|eot_id|> | |
| <|start_header_id|>user<|end_header_id|> | |
| Context: {context} | |
| Question: {question}<|eot_id|> | |
| <|start_header_id|>assistant<|end_header_id|>""" | |
| # Get response from Groq | |
| response = groq_client.chat.completions.create( | |
| messages=[{"role": "user", "content": prompt}], | |
| model="llama3-70b-8192", | |
| temperature=0, | |
| ) | |
| return response.choices[0].message.content | |
| except Exception as e: | |
| return f"Error: {str(e)}" | |
| # Gradio UI | |
| with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
| gr.Markdown("## π PDF RAG System with Groq & LLaMA 3", elem_id="title") | |
| gr.Markdown("Upload a PDF file, process it, and ask questions using LLaMA 3. β¨") | |
| with gr.Tab("π Upload & Process"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"]) | |
| process_button = gr.Button("π Process PDF", variant="primary") | |
| doc_info = gr.Textbox(label="Document Info") | |
| output_text = gr.Textbox( | |
| label="First Chunk Preview", | |
| lines=12, | |
| placeholder="Processed PDF content will appear here...", | |
| show_copy_button=True | |
| ) | |
| process_button.click( | |
| fn=process_pdf, | |
| inputs=pdf_input, | |
| outputs=[output_text, doc_info] | |
| ) | |
| with gr.Tab("β Ask Questions"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| question_input = gr.Textbox( | |
| label="Your Question", | |
| placeholder="Ask something about the PDF content..." | |
| ) | |
| ask_button = gr.Button("π Get Answer", variant="primary") | |
| answer_output = gr.Textbox( | |
| label="Answer", | |
| lines=8, | |
| interactive=False | |
| ) | |
| ask_button.click( | |
| fn=answer_question, | |
| inputs=question_input, | |
| outputs=answer_output | |
| ) | |
| gr.Markdown("<p style='text-align: center; color: gray;'>Made with β€ using Gradio + Groq + LLaMA 3</p>") | |
| if __name__ == "__main__": | |
| demo.launch() | |