File size: 3,415 Bytes
8a1b6fa
 
 
6fb0b86
 
 
9787256
6fb0b86
 
cce1fbb
6fb0b86
 
cce1fbb
 
8a1b6fa
 
 
 
 
9787256
8a1b6fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6fb0b86
8a1b6fa
 
 
 
 
 
 
 
 
 
9787256
8a1b6fa
 
 
 
 
9787256
 
6fb0b86
9787256
8a1b6fa
 
6fb0b86
8a1b6fa
 
 
 
6fb0b86
8a1b6fa
 
 
 
 
 
 
 
 
 
 
 
9787256
8a1b6fa
 
 
9787256
8a1b6fa
 
 
 
 
 
6fb0b86
8a1b6fa
 
 
 
9787256
8a1b6fa
 
9787256
8a1b6fa
 
 
 
 
 
 
 
 
 
 
 
 
 
9787256
8a1b6fa
 
 
 
6fb0b86
 
 
 
8cbfc80
8a1b6fa
 
 
 
 
9787256
8a1b6fa
 
 
9787256
8a1b6fa
 
 
6fb0b86
8cbfc80
8a1b6fa
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
# =========================
# IMPORTS
# =========================
import os
import tempfile
import gradio as gr

from groq import Groq
from duckduckgo_search import DDGS

from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma


# =========================
# CONFIG
# =========================
GROQ_API_KEY = os.getenv("GROQ_API_KEY")  # HF SECRET KEY

client = Groq(api_key=GROQ_API_KEY)

vectorstore = None
retriever = None


# =========================
# PROMPT
# =========================
def build_prompt(context, question):
    return f"""
You are an expert AI assistant.

Use ONLY the context below.
If answer is not present, say "Not found in document".

CONTEXT:
{context}

QUESTION:
{question}

ANSWER:
"""


# =========================
# WEB SEARCH (FALLBACK)
# =========================
def web_search(query):
    results = []
    with DDGS() as ddgs:
        for r in ddgs.text(query, max_results=3):
            results.append(r["body"])
    return "\n\n".join(results)


# =========================
# PROCESS PDF (HF SAFE)
# =========================
def process_pdf(file):

    global vectorstore, retriever

    # safe file handling
    file_path = file.name

    loader = PyPDFLoader(file_path)
    documents = loader.load()

    splitter = RecursiveCharacterTextSplitter(
        chunk_size=600,
        chunk_overlap=100
    )

    chunks = splitter.split_documents(documents)

    embedding_model = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-MiniLM-L6-v2"
    )

    vectorstore = Chroma.from_documents(
        documents=chunks,
        embedding=embedding_model
    )

    retriever = vectorstore.as_retriever(search_kwargs={"k": 4})

    return "✅ PDF successfully processed"


# =========================
# RAG ENGINE (HYBRID)
# =========================
def ask_rag(query):

    global retriever

    if retriever is None:
        return "⚠️ Please upload a PDF first."

    docs = retriever.invoke(query)
    pdf_context = "\n\n".join([d.page_content for d in docs])

    # hybrid fallback
    if len(pdf_context.strip()) < 50:
        web_context = web_search(query)
        context = pdf_context + "\n\nWEB:\n" + web_context
    else:
        context = pdf_context

    prompt = build_prompt(context, query)

    response = client.chat.completions.create(
        messages=[{"role": "user", "content": prompt}],
        model="llama3-8b-8192"
    )

    return response.choices[0].message.content


# =========================
# CHAT FUNCTION (FIXED FORMAT)
# =========================
def chat(user_message, history):

    response = ask_rag(user_message)

    if history is None:
        history = []

        history.append((user_message, response))

    return history, history


# =========================
# UI (HUGGING FACE SAFE)
# =========================
with gr.Blocks() as app:

    gr.Markdown("# 🧠 Hybrid RAG Chatbot (PDF + Web)")

    file = gr.File(label="Upload PDF")
    status = gr.Textbox(label="Status")

    chatbot = gr.Chatbot()  # IMPORTANT FIX
    msg = gr.Textbox(placeholder="Ask your question...")
    state = gr.State([])

    file.change(process_pdf, file, status)
    msg.submit(chat, [msg, state], [chatbot, state])

app.launch()