import os import gradio as gr from groq import Groq from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_community.document_loaders import PyPDFLoader from langchain_community.vectorstores import FAISS from langchain_community.embeddings import HuggingFaceEmbeddings # ------------------------------ # API KEY # ------------------------------ # client = Groq(api_key=os.environ.get("GROQ_API")) client = Groq(api_key=os.environ.get("GROQ_API")) vector_db = None # ------------------------------ # EMBEDDING MODEL # ------------------------------ embedding_model = HuggingFaceEmbeddings( model_name="sentence-transformers/all-MiniLM-L6-v2" ) # ------------------------------ # BUILD KNOWLEDGE BASE # ------------------------------ def build_knowledge_base(files): global vector_db if not files: return "Please upload at least one PDF." all_docs = [] for file in files: file_path = file.name loader = PyPDFLoader(file_path) pages = loader.load() for page in pages: page.metadata["source"] = os.path.basename(file_path) page.metadata["page"] = page.metadata.get("page", 0) all_docs.extend(pages) splitter = RecursiveCharacterTextSplitter( chunk_size=1000, chunk_overlap=200 ) chunks = splitter.split_documents(all_docs) vector_db = FAISS.from_documents( chunks, embedding_model ) return f"Knowledge base created with {len(chunks)} chunks." # ------------------------------ # CONTEXT BUILDER # ------------------------------ def build_context(docs): context = "" sources = [] for d in docs: context += d.page_content + "\n\n" src = f"{d.metadata['source']} (Page {d.metadata['page']})" if src not in sources: sources.append(src) return context, sources # ------------------------------ # QUESTION ANSWERING # ------------------------------ def ask_question(question): global vector_db if vector_db is None: yield "Please upload and build the knowledge base first." return docs = vector_db.similarity_search(question, k=5) context, sources = build_context(docs) prompt = f""" You are an expert document assistant. Answer ONLY using the context below. If the answer is not present, say: "I could not find the answer in the documents." Context: {context} Question: {question} Answer: """ stream = client.chat.completions.create( model="llama-3.3-70b-versatile", messages=[{"role": "user", "content": prompt}], stream=True ) response = "" for chunk in stream: if chunk.choices[0].delta.content: token = chunk.choices[0].delta.content response += token yield response source_text = "\n\nSources:\n" for s in sources: source_text += f"- {s}\n" yield response + source_text # ------------------------------ # UI # ------------------------------ with gr.Blocks(theme=gr.themes.Soft()) as app: gr.Markdown("# 📚 AI Knowledge Base Assistant") gr.Markdown("Upload PDFs and ask questions about them.") with gr.Row(): file_input = gr.File( file_count="multiple", label="Upload PDF Files" ) build_btn = gr.Button("Build Knowledge Base") status = gr.Textbox(label="System Status") build_btn.click( build_knowledge_base, inputs=file_input, outputs=status ) gr.Markdown("## Ask Questions") question = gr.Textbox( placeholder="Ask something about the documents..." ) ask_btn = gr.Button("Ask AI") answer = gr.Textbox( label="AI Response", lines=15 ) ask_btn.click( ask_question, inputs=question, outputs=answer ) gr.Markdown( """ --- © 2026 AI Document Assistant Developed by **Asif Jamal** """ ) app.launch()