Spaces:

asif-coder
/

ChatPDF-Upgraded

Sleeping

File size: 4,060 Bytes

import os
import gradio as gr
from groq import Groq

from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings


# ------------------------------
# API KEY
# ------------------------------

# client = Groq(api_key=os.environ.get("GROQ_API"))
client = Groq(api_key=os.environ.get("GROQ_API"))

vector_db = None


# ------------------------------
# EMBEDDING MODEL
# ------------------------------

embedding_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)


# ------------------------------
# BUILD KNOWLEDGE BASE
# ------------------------------

def build_knowledge_base(files):

    global vector_db

    if not files:
        return "Please upload at least one PDF."

    all_docs = []

    for file in files:

        file_path = file.name

        loader = PyPDFLoader(file_path)

        pages = loader.load()

        for page in pages:
            page.metadata["source"] = os.path.basename(file_path)
            page.metadata["page"] = page.metadata.get("page", 0)

        all_docs.extend(pages)

    splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=200
    )

    chunks = splitter.split_documents(all_docs)

    vector_db = FAISS.from_documents(
        chunks,
        embedding_model
    )

    return f"Knowledge base created with {len(chunks)} chunks."


# ------------------------------
# CONTEXT BUILDER
# ------------------------------

def build_context(docs):

    context = ""
    sources = []

    for d in docs:

        context += d.page_content + "\n\n"

        src = f"{d.metadata['source']} (Page {d.metadata['page']})"

        if src not in sources:
            sources.append(src)

    return context, sources


# ------------------------------
# QUESTION ANSWERING
# ------------------------------

def ask_question(question):

    global vector_db

    if vector_db is None:
        yield "Please upload and build the knowledge base first."
        return

    docs = vector_db.similarity_search(question, k=5)

    context, sources = build_context(docs)

    prompt = f"""
You are an expert document assistant.

Answer ONLY using the context below.

If the answer is not present, say:
"I could not find the answer in the documents."

Context:
{context}

Question:
{question}

Answer:
"""

    stream = client.chat.completions.create(
        model="llama-3.3-70b-versatile",
        messages=[{"role": "user", "content": prompt}],
        stream=True
    )

    response = ""

    for chunk in stream:

        if chunk.choices[0].delta.content:

            token = chunk.choices[0].delta.content

            response += token

            yield response

    source_text = "\n\nSources:\n"

    for s in sources:
        source_text += f"- {s}\n"

    yield response + source_text


# ------------------------------
# UI
# ------------------------------

with gr.Blocks(theme=gr.themes.Soft()) as app:

    gr.Markdown("# 📚 AI Knowledge Base Assistant")

    gr.Markdown("Upload PDFs and ask questions about them.")

    with gr.Row():

        file_input = gr.File(
            file_count="multiple",
            label="Upload PDF Files"
        )

        build_btn = gr.Button("Build Knowledge Base")

    status = gr.Textbox(label="System Status")

    build_btn.click(
        build_knowledge_base,
        inputs=file_input,
        outputs=status
    )

    gr.Markdown("## Ask Questions")

    question = gr.Textbox(
        placeholder="Ask something about the documents..."
    )

    ask_btn = gr.Button("Ask AI")

    answer = gr.Textbox(
        label="AI Response",
        lines=15
    )

    ask_btn.click(
        ask_question,
        inputs=question,
        outputs=answer
    )

    gr.Markdown(
        """
        ---
        © 2026 AI Document Assistant  
        Developed by **Asif Jamal**
        """
    )


app.launch()