Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| from groq import Groq | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from langchain_community.document_loaders import PyPDFLoader | |
| from langchain_community.vectorstores import FAISS | |
| from langchain_community.embeddings import HuggingFaceEmbeddings | |
| # ------------------------------ | |
| # API KEY | |
| # ------------------------------ | |
| # client = Groq(api_key=os.environ.get("GROQ_API")) | |
| client = Groq(api_key=os.environ.get("GROQ_API")) | |
| vector_db = None | |
| # ------------------------------ | |
| # EMBEDDING MODEL | |
| # ------------------------------ | |
| embedding_model = HuggingFaceEmbeddings( | |
| model_name="sentence-transformers/all-MiniLM-L6-v2" | |
| ) | |
| # ------------------------------ | |
| # BUILD KNOWLEDGE BASE | |
| # ------------------------------ | |
| def build_knowledge_base(files): | |
| global vector_db | |
| if not files: | |
| return "Please upload at least one PDF." | |
| all_docs = [] | |
| for file in files: | |
| file_path = file.name | |
| loader = PyPDFLoader(file_path) | |
| pages = loader.load() | |
| for page in pages: | |
| page.metadata["source"] = os.path.basename(file_path) | |
| page.metadata["page"] = page.metadata.get("page", 0) | |
| all_docs.extend(pages) | |
| splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=1000, | |
| chunk_overlap=200 | |
| ) | |
| chunks = splitter.split_documents(all_docs) | |
| vector_db = FAISS.from_documents( | |
| chunks, | |
| embedding_model | |
| ) | |
| return f"Knowledge base created with {len(chunks)} chunks." | |
| # ------------------------------ | |
| # CONTEXT BUILDER | |
| # ------------------------------ | |
| def build_context(docs): | |
| context = "" | |
| sources = [] | |
| for d in docs: | |
| context += d.page_content + "\n\n" | |
| src = f"{d.metadata['source']} (Page {d.metadata['page']})" | |
| if src not in sources: | |
| sources.append(src) | |
| return context, sources | |
| # ------------------------------ | |
| # QUESTION ANSWERING | |
| # ------------------------------ | |
| def ask_question(question): | |
| global vector_db | |
| if vector_db is None: | |
| yield "Please upload and build the knowledge base first." | |
| return | |
| docs = vector_db.similarity_search(question, k=5) | |
| context, sources = build_context(docs) | |
| prompt = f""" | |
| You are an expert document assistant. | |
| Answer ONLY using the context below. | |
| If the answer is not present, say: | |
| "I could not find the answer in the documents." | |
| Context: | |
| {context} | |
| Question: | |
| {question} | |
| Answer: | |
| """ | |
| stream = client.chat.completions.create( | |
| model="llama-3.3-70b-versatile", | |
| messages=[{"role": "user", "content": prompt}], | |
| stream=True | |
| ) | |
| response = "" | |
| for chunk in stream: | |
| if chunk.choices[0].delta.content: | |
| token = chunk.choices[0].delta.content | |
| response += token | |
| yield response | |
| source_text = "\n\nSources:\n" | |
| for s in sources: | |
| source_text += f"- {s}\n" | |
| yield response + source_text | |
| # ------------------------------ | |
| # UI | |
| # ------------------------------ | |
| with gr.Blocks(theme=gr.themes.Soft()) as app: | |
| gr.Markdown("# 📚 AI Knowledge Base Assistant") | |
| gr.Markdown("Upload PDFs and ask questions about them.") | |
| with gr.Row(): | |
| file_input = gr.File( | |
| file_count="multiple", | |
| label="Upload PDF Files" | |
| ) | |
| build_btn = gr.Button("Build Knowledge Base") | |
| status = gr.Textbox(label="System Status") | |
| build_btn.click( | |
| build_knowledge_base, | |
| inputs=file_input, | |
| outputs=status | |
| ) | |
| gr.Markdown("## Ask Questions") | |
| question = gr.Textbox( | |
| placeholder="Ask something about the documents..." | |
| ) | |
| ask_btn = gr.Button("Ask AI") | |
| answer = gr.Textbox( | |
| label="AI Response", | |
| lines=15 | |
| ) | |
| ask_btn.click( | |
| ask_question, | |
| inputs=question, | |
| outputs=answer | |
| ) | |
| gr.Markdown( | |
| """ | |
| --- | |
| © 2026 AI Document Assistant | |
| Developed by **Asif Jamal** | |
| """ | |
| ) | |
| app.launch() |