import gradio as gr from huggingface_hub import InferenceClient import os from loader import Loader from chunker import Chunker from embedder import Embedder from vector import VectorStorage from retriever import Retriever MODEL_ID = "Qwen/Qwen2.5-7B-Instruct" client = InferenceClient(MODEL_ID, token=os.getenv("HF_TOKEN")) def process_document(file): if file is None: return None, None, "❌ Please upload a PDF first." text = Loader(file.name).load() chunks = Chunker().chunker(text) embedder = Embedder() vectors = embedder.embed(chunks) store = VectorStorage(dimension=len(vectors[0])) store.add(vectors, chunks) return store, embedder, "✅ PDF Indexed. Ready to chat!" def rag_chat(message, history, store, embedder): if store is None: yield "Please upload and process a PDF on the left first." return retriever = Retriever(store, embedder, k=3) context_chunks = retriever.retrieve(message) context_text = "\n\n".join(context_chunks) if context_chunks else "No relevant context found." system_prompt = "You are a research assistant. Use the provided context to answer. If the answer isn't there, say you don't know." messages = [{"role": "system", "content": system_prompt}] for entry in history: messages.append({"role": entry["role"], "content": entry["content"]}) messages.append({"role": "user", "content": f"Context:\n{context_text}\n\nQuestion: {message}"}) response = "" try: for token in client.chat_completion(messages=messages, max_tokens=512, stream=True): if token.choices and len(token.choices) > 0: token_text = token.choices[0].delta.content if token_text: response += token_text yield response except Exception as e: yield f"⚠️ API Error: {str(e)}" with gr.Blocks(theme=gr.themes.Soft(primary_hue="slate")) as demo: store_state = gr.State() embedder_state = gr.State() gr.Markdown("# 📑 DocuMind AI") with gr.Row(): with gr.Column(scale=1): file_input = gr.File(label="Source Document", file_types=[".pdf"]) btn = gr.Button("Build Knowledge Base", variant="primary") status = gr.Markdown("Status: Waiting for upload...") with gr.Column(scale=3): gr.ChatInterface( fn=rag_chat, additional_inputs=[store_state, embedder_state], fill_height=True ) btn.click(fn=process_document, inputs=[file_input], outputs=[store_state, embedder_state, status]) if __name__ == "__main__": demo.launch()