Spaces:
Sleeping
Sleeping
| # ========================= | |
| # IMPORTS | |
| # ========================= | |
| import os | |
| import tempfile | |
| import gradio as gr | |
| from groq import Groq | |
| from duckduckgo_search import DDGS | |
| from langchain_community.document_loaders import PyPDFLoader | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain_community.embeddings import HuggingFaceEmbeddings | |
| from langchain_community.vectorstores import Chroma | |
| # ========================= | |
| # CONFIG | |
| # ========================= | |
| GROQ_API_KEY = os.getenv("GROQ_API_KEY") # HF SECRET KEY | |
| client = Groq(api_key=GROQ_API_KEY) | |
| vectorstore = None | |
| retriever = None | |
| # ========================= | |
| # PROMPT | |
| # ========================= | |
| def build_prompt(context, question): | |
| return f""" | |
| You are an expert AI assistant. | |
| Use ONLY the context below. | |
| If answer is not present, say "Not found in document". | |
| CONTEXT: | |
| {context} | |
| QUESTION: | |
| {question} | |
| ANSWER: | |
| """ | |
| # ========================= | |
| # WEB SEARCH (FALLBACK) | |
| # ========================= | |
| def web_search(query): | |
| results = [] | |
| with DDGS() as ddgs: | |
| for r in ddgs.text(query, max_results=3): | |
| results.append(r["body"]) | |
| return "\n\n".join(results) | |
| # ========================= | |
| # PROCESS PDF (HF SAFE) | |
| # ========================= | |
| def process_pdf(file): | |
| global vectorstore, retriever | |
| # safe file handling | |
| file_path = file.name | |
| loader = PyPDFLoader(file_path) | |
| documents = loader.load() | |
| splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=600, | |
| chunk_overlap=100 | |
| ) | |
| chunks = splitter.split_documents(documents) | |
| embedding_model = HuggingFaceEmbeddings( | |
| model_name="sentence-transformers/all-MiniLM-L6-v2" | |
| ) | |
| vectorstore = Chroma.from_documents( | |
| documents=chunks, | |
| embedding=embedding_model | |
| ) | |
| retriever = vectorstore.as_retriever(search_kwargs={"k": 4}) | |
| return "✅ PDF successfully processed" | |
| # ========================= | |
| # RAG ENGINE (HYBRID) | |
| # ========================= | |
| def ask_rag(query): | |
| global retriever | |
| if retriever is None: | |
| return "⚠️ Please upload a PDF first." | |
| docs = retriever.invoke(query) | |
| pdf_context = "\n\n".join([d.page_content for d in docs]) | |
| # hybrid fallback | |
| if len(pdf_context.strip()) < 50: | |
| web_context = web_search(query) | |
| context = pdf_context + "\n\nWEB:\n" + web_context | |
| else: | |
| context = pdf_context | |
| prompt = build_prompt(context, query) | |
| response = client.chat.completions.create( | |
| messages=[{"role": "user", "content": prompt}], | |
| model="llama3-8b-8192" | |
| ) | |
| return response.choices[0].message.content | |
| # ========================= | |
| # CHAT FUNCTION (FIXED FORMAT) | |
| # ========================= | |
| def chat(user_message, history): | |
| response = ask_rag(user_message) | |
| if history is None: | |
| history = [] | |
| history.append((user_message, response)) | |
| return history, history | |
| # ========================= | |
| # UI (HUGGING FACE SAFE) | |
| # ========================= | |
| with gr.Blocks() as app: | |
| gr.Markdown("# 🧠 Hybrid RAG Chatbot (PDF + Web)") | |
| file = gr.File(label="Upload PDF") | |
| status = gr.Textbox(label="Status") | |
| chatbot = gr.Chatbot() # IMPORTANT FIX | |
| msg = gr.Textbox(placeholder="Ask your question...") | |
| state = gr.State([]) | |
| file.change(process_pdf, file, status) | |
| msg.submit(chat, [msg, state], [chatbot, state]) | |
| app.launch() |