# ========================= # IMPORTS # ========================= import os import tempfile import gradio as gr from groq import Groq from duckduckgo_search import DDGS from langchain_community.document_loaders import PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.embeddings import HuggingFaceEmbeddings from langchain_community.vectorstores import Chroma # ========================= # CONFIG # ========================= GROQ_API_KEY = os.getenv("GROQ_API_KEY") # HF SECRET KEY client = Groq(api_key=GROQ_API_KEY) vectorstore = None retriever = None # ========================= # PROMPT # ========================= def build_prompt(context, question): return f""" You are an expert AI assistant. Use ONLY the context below. If answer is not present, say "Not found in document". CONTEXT: {context} QUESTION: {question} ANSWER: """ # ========================= # WEB SEARCH (FALLBACK) # ========================= def web_search(query): results = [] with DDGS() as ddgs: for r in ddgs.text(query, max_results=3): results.append(r["body"]) return "\n\n".join(results) # ========================= # PROCESS PDF (HF SAFE) # ========================= def process_pdf(file): global vectorstore, retriever # safe file handling file_path = file.name loader = PyPDFLoader(file_path) documents = loader.load() splitter = RecursiveCharacterTextSplitter( chunk_size=600, chunk_overlap=100 ) chunks = splitter.split_documents(documents) embedding_model = HuggingFaceEmbeddings( model_name="sentence-transformers/all-MiniLM-L6-v2" ) vectorstore = Chroma.from_documents( documents=chunks, embedding=embedding_model ) retriever = vectorstore.as_retriever(search_kwargs={"k": 4}) return "✅ PDF successfully processed" # ========================= # RAG ENGINE (HYBRID) # ========================= def ask_rag(query): global retriever if retriever is None: return "⚠️ Please upload a PDF first." docs = retriever.invoke(query) pdf_context = "\n\n".join([d.page_content for d in docs]) # hybrid fallback if len(pdf_context.strip()) < 50: web_context = web_search(query) context = pdf_context + "\n\nWEB:\n" + web_context else: context = pdf_context prompt = build_prompt(context, query) response = client.chat.completions.create( messages=[{"role": "user", "content": prompt}], model="llama3-8b-8192" ) return response.choices[0].message.content # ========================= # CHAT FUNCTION (FIXED FORMAT) # ========================= def chat(user_message, history): response = ask_rag(user_message) if history is None: history = [] history.append((user_message, response)) return history, history # ========================= # UI (HUGGING FACE SAFE) # ========================= with gr.Blocks() as app: gr.Markdown("# 🧠 Hybrid RAG Chatbot (PDF + Web)") file = gr.File(label="Upload PDF") status = gr.Textbox(label="Status") chatbot = gr.Chatbot() # IMPORTANT FIX msg = gr.Textbox(placeholder="Ask your question...") state = gr.State([]) file.change(process_pdf, file, status) msg.submit(chat, [msg, state], [chatbot, state]) app.launch()