HuzaifaTech commited on
Commit
6fb0b86
·
verified ·
1 Parent(s): cce1fbb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -20
app.py CHANGED
@@ -1,22 +1,23 @@
1
  # =========================
2
  # IMPORTS
3
  # =========================
4
- from langchain_community.document_loaders import PyPDFLoader
5
- from langchain.text_splitter import RecursiveCharacterTextSplitter
6
 
 
 
 
7
 
 
 
8
  from langchain_community.embeddings import HuggingFaceEmbeddings
9
  from langchain_community.vectorstores import Chroma
10
- from groq import Groq
11
- from duckduckgo_search import DDGS
12
- import gradio as gr
13
 
14
 
15
  # =========================
16
  # CONFIG
17
  # =========================
18
- import os
19
- GROQ_API_KEY = os.getenv("Ai_tutor") # 🔥 set in HF secrets instead
20
 
21
  client = Groq(api_key=GROQ_API_KEY)
22
 
@@ -45,7 +46,7 @@ ANSWER:
45
 
46
 
47
  # =========================
48
- # WEB SEARCH
49
  # =========================
50
  def web_search(query):
51
  results = []
@@ -56,21 +57,26 @@ def web_search(query):
56
 
57
 
58
  # =========================
59
- # PROCESS PDF
60
  # =========================
61
  def process_pdf(file):
62
 
63
  global vectorstore, retriever
64
 
65
- loader = PyPDFLoader(file.name)
 
 
 
 
 
66
  documents = loader.load()
67
 
68
- text_splitter = RecursiveCharacterTextSplitter(
69
  chunk_size=600,
70
  chunk_overlap=100
71
  )
72
 
73
- chunks = text_splitter.split_documents(documents)
74
 
75
  embedding_model = HuggingFaceEmbeddings(
76
  model_name="sentence-transformers/all-MiniLM-L6-v2"
@@ -83,26 +89,26 @@ def process_pdf(file):
83
 
84
  retriever = vectorstore.as_retriever(search_kwargs={"k": 4})
85
 
86
- return "✅ PDF processed. Ask questions."
87
 
88
 
89
  # =========================
90
- # RAG FUNCTION (HYBRID)
91
  # =========================
92
  def ask_rag(query):
93
 
94
  global retriever
95
 
96
  if retriever is None:
97
- return "⚠️ Upload a PDF first."
98
 
99
  docs = retriever.invoke(query)
100
  pdf_context = "\n\n".join([d.page_content for d in docs])
101
 
102
- # Hybrid fallback
103
  if len(pdf_context.strip()) < 50:
104
  web_context = web_search(query)
105
- context = pdf_context + "\n\nWEB:\n" + web_context
106
  else:
107
  context = pdf_context
108
 
@@ -117,25 +123,30 @@ def ask_rag(query):
117
 
118
 
119
  # =========================
120
- # CHAT
121
  # =========================
122
  def chat(user_message, history):
123
 
124
  response = ask_rag(user_message)
 
 
 
 
125
  history.append((user_message, response))
126
 
127
  return history, history
128
 
129
 
130
  # =========================
131
- # UI
132
  # =========================
133
  with gr.Blocks() as app:
134
 
135
- gr.Markdown("# 🧠 Hybrid RAG Chatbot (PDF + Web)")
136
 
137
  file = gr.File(label="Upload PDF")
138
  status = gr.Textbox(label="Status")
 
139
  chatbot = gr.Chatbot()
140
  msg = gr.Textbox(placeholder="Ask your question...")
141
  state = gr.State([])
 
1
  # =========================
2
  # IMPORTS
3
  # =========================
4
+ import os
5
+ import tempfile
6
 
7
+ import gradio as gr
8
+ from groq import Groq
9
+ from duckduckgo_search import DDGS
10
 
11
+ from langchain_community.document_loaders import PyPDFLoader
12
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
13
  from langchain_community.embeddings import HuggingFaceEmbeddings
14
  from langchain_community.vectorstores import Chroma
 
 
 
15
 
16
 
17
  # =========================
18
  # CONFIG
19
  # =========================
20
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY") # ✅ Hugging Face Secret
 
21
 
22
  client = Groq(api_key=GROQ_API_KEY)
23
 
 
46
 
47
 
48
  # =========================
49
+ # WEB SEARCH (FALLBACK)
50
  # =========================
51
  def web_search(query):
52
  results = []
 
57
 
58
 
59
  # =========================
60
+ # PROCESS PDF (FIXED FOR HF)
61
  # =========================
62
  def process_pdf(file):
63
 
64
  global vectorstore, retriever
65
 
66
+ # SAFE HF FILE HANDLING
67
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
68
+ tmp.write(file.read())
69
+ tmp_path = tmp.name
70
+
71
+ loader = PyPDFLoader(tmp_path)
72
  documents = loader.load()
73
 
74
+ splitter = RecursiveCharacterTextSplitter(
75
  chunk_size=600,
76
  chunk_overlap=100
77
  )
78
 
79
+ chunks = splitter.split_documents(documents)
80
 
81
  embedding_model = HuggingFaceEmbeddings(
82
  model_name="sentence-transformers/all-MiniLM-L6-v2"
 
89
 
90
  retriever = vectorstore.as_retriever(search_kwargs={"k": 4})
91
 
92
+ return "✅ PDF processed successfully. You can now ask questions."
93
 
94
 
95
  # =========================
96
+ # HYBRID RAG FUNCTION
97
  # =========================
98
  def ask_rag(query):
99
 
100
  global retriever
101
 
102
  if retriever is None:
103
+ return "⚠️ Please upload a PDF first."
104
 
105
  docs = retriever.invoke(query)
106
  pdf_context = "\n\n".join([d.page_content for d in docs])
107
 
108
+ # fallback if weak retrieval
109
  if len(pdf_context.strip()) < 50:
110
  web_context = web_search(query)
111
+ context = pdf_context + "\n\nWEB CONTEXT:\n" + web_context
112
  else:
113
  context = pdf_context
114
 
 
123
 
124
 
125
  # =========================
126
+ # CHAT FUNCTION (SAFE)
127
  # =========================
128
  def chat(user_message, history):
129
 
130
  response = ask_rag(user_message)
131
+
132
+ if history is None:
133
+ history = []
134
+
135
  history.append((user_message, response))
136
 
137
  return history, history
138
 
139
 
140
  # =========================
141
+ # GRADIO UI (HF SAFE)
142
  # =========================
143
  with gr.Blocks() as app:
144
 
145
+ gr.Markdown("# 🧠 Hybrid RAG Chatbot (PDF + Web Search)")
146
 
147
  file = gr.File(label="Upload PDF")
148
  status = gr.Textbox(label="Status")
149
+
150
  chatbot = gr.Chatbot()
151
  msg = gr.Textbox(placeholder="Ask your question...")
152
  state = gr.State([])