Spaces:

asif-coder
/

RAG-based-Document-Chatbot

Runtime error

File size: 3,979 Bytes

import os
# os.environ["GROQ_API_KEY"] = "YOUR_GROQ_API_KEY"
# from google.colab import userdata
# GROQ_API_KEY=userdata.get('rag-based')
import gradio as gr
from groq import Groq

from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings

# -----------------------------
# Environment Setup
# -----------------------------

# GROQ_API_KEY = os.environ.get("Rag-based")


client = Groq(api_key=os.environ.get("Rag_based"))

# -----------------------------
# Global Variables
# -----------------------------

vector_db = None

# -----------------------------
# Embedding Model
# -----------------------------

embedding_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

# -----------------------------
# Document Processing Function
# -----------------------------

def process_document(pdf_file):

    global vector_db

    if pdf_file is None:
        return "Please upload a PDF Document first."

    try:

        # Load PDF
        loader = PyPDFLoader(pdf_file.name)
        documents = loader.load()

        # Chunking
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=200
        )

        chunks = text_splitter.split_documents(documents)

        # Create FAISS vector database
        vector_db = FAISS.from_documents(
            chunks,
            embedding_model
        )

        return f"Document processed successfully. {len(chunks)} chunks of your document created. Now, proceed to ask your question ahead."

    except Exception as e:
        return f"Error processing document: {str(e)}"


# -----------------------------
# Question Answering Function
# -----------------------------

def ask_question(question):

    global vector_db

    if vector_db is None:
        return "Please upload and process a PDF document first."

    try:

        # Retrieve relevant chunks
        docs = vector_db.similarity_search(question, k=4)

        context = "\n\n".join([doc.page_content for doc in docs])

        prompt = f"""
You are a helpful assistant. Answer the question ONLY using the following context.
If the answer is not in the context, say "I could not find the answer in the provided context."

Context:
{context}

Question:
{question}

Answer clearly and based only on the provided context.
"""

        # Groq LLM call
        chat_completion = client.chat.completions.create(
            messages=[
                {"role": "user", "content": prompt}
            ],
            model="llama-3.3-70b-versatile",
        )

        response = chat_completion.choices[0].message.content

        return response

    except Exception as e:
        return f"Error generating answer: {str(e)}"


# -----------------------------
# Gradio Interface
# -----------------------------

with gr.Blocks() as demo:

    gr.Markdown("# 📄 PDF Document Assistant Developed by Asif Jamal")

    gr.Markdown(
        "Upload a PDF document and ask questions about its content."
    )

    pdf_input = gr.File(label="Upload PDF Document")

    process_button = gr.Button("Click to Process Document")

    process_output = gr.Textbox(label="Processing Status")

    process_button.click(
        process_document,
        inputs=pdf_input,
        outputs=process_output
    )

    gr.Markdown("## Ask Questions")

    question_input = gr.Textbox(
        label="Enter your question."
    )

    ask_button = gr.Button("Click to Proceed")

    answer_output = gr.Textbox(
        label="Answer",
        lines=10
    )

    ask_button.click(
        ask_question,
        inputs=question_input,
        outputs=answer_output
    )

    gr.Markdown(
        """
        ---
        © 2026 AI Document Assistant  
        Developed by Asif Jamal
        """
    )

demo.launch()