asif-coder's picture
Update app.py
551b2f7 verified
Raw
History Blame Contribute Delete
4.06 kB
import os
import gradio as gr
from groq import Groq
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
# ------------------------------
# API KEY
# ------------------------------
# client = Groq(api_key=os.environ.get("GROQ_API"))
client = Groq(api_key=os.environ.get("GROQ_API"))
vector_db = None
# ------------------------------
# EMBEDDING MODEL
# ------------------------------
embedding_model = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2"
)
# ------------------------------
# BUILD KNOWLEDGE BASE
# ------------------------------
def build_knowledge_base(files):
global vector_db
if not files:
return "Please upload at least one PDF."
all_docs = []
for file in files:
file_path = file.name
loader = PyPDFLoader(file_path)
pages = loader.load()
for page in pages:
page.metadata["source"] = os.path.basename(file_path)
page.metadata["page"] = page.metadata.get("page", 0)
all_docs.extend(pages)
splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200
)
chunks = splitter.split_documents(all_docs)
vector_db = FAISS.from_documents(
chunks,
embedding_model
)
return f"Knowledge base created with {len(chunks)} chunks."
# ------------------------------
# CONTEXT BUILDER
# ------------------------------
def build_context(docs):
context = ""
sources = []
for d in docs:
context += d.page_content + "\n\n"
src = f"{d.metadata['source']} (Page {d.metadata['page']})"
if src not in sources:
sources.append(src)
return context, sources
# ------------------------------
# QUESTION ANSWERING
# ------------------------------
def ask_question(question):
global vector_db
if vector_db is None:
yield "Please upload and build the knowledge base first."
return
docs = vector_db.similarity_search(question, k=5)
context, sources = build_context(docs)
prompt = f"""
You are an expert document assistant.
Answer ONLY using the context below.
If the answer is not present, say:
"I could not find the answer in the documents."
Context:
{context}
Question:
{question}
Answer:
"""
stream = client.chat.completions.create(
model="llama-3.3-70b-versatile",
messages=[{"role": "user", "content": prompt}],
stream=True
)
response = ""
for chunk in stream:
if chunk.choices[0].delta.content:
token = chunk.choices[0].delta.content
response += token
yield response
source_text = "\n\nSources:\n"
for s in sources:
source_text += f"- {s}\n"
yield response + source_text
# ------------------------------
# UI
# ------------------------------
with gr.Blocks(theme=gr.themes.Soft()) as app:
gr.Markdown("# 📚 AI Knowledge Base Assistant")
gr.Markdown("Upload PDFs and ask questions about them.")
with gr.Row():
file_input = gr.File(
file_count="multiple",
label="Upload PDF Files"
)
build_btn = gr.Button("Build Knowledge Base")
status = gr.Textbox(label="System Status")
build_btn.click(
build_knowledge_base,
inputs=file_input,
outputs=status
)
gr.Markdown("## Ask Questions")
question = gr.Textbox(
placeholder="Ask something about the documents..."
)
ask_btn = gr.Button("Ask AI")
answer = gr.Textbox(
label="AI Response",
lines=15
)
ask_btn.click(
ask_question,
inputs=question,
outputs=answer
)
gr.Markdown(
"""
---
© 2026 AI Document Assistant
Developed by **Asif Jamal**
"""
)
app.launch()