faiz0983 commited on
Commit
e7c8f2f
·
verified ·
1 Parent(s): 3739703

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -95
app.py CHANGED
@@ -1,9 +1,9 @@
1
  import os
2
  import gradio as gr
3
 
4
- # LangChain Core
5
- from langchain.chains import ConversationalRetrievalChain
6
- from langchain.memory import ConversationBufferMemory
7
  from langchain.prompts import PromptTemplate
8
  from langchain.retrievers import EnsembleRetriever
9
 
@@ -20,20 +20,20 @@ from langchain_community.document_loaders import (
20
  )
21
  from langchain_community.retrievers import BM25Retriever
22
 
23
- # Text Splitters
24
  from langchain_text_splitters import RecursiveCharacterTextSplitter
25
 
26
  # --------------------------------------------------
27
- # 1. API KEY
28
  # --------------------------------------------------
29
  GROQ_API_KEY = os.getenv("GROQ_API")
30
 
31
  STRICT_PROMPT_TEMPLATE = """You are a strict document-based assistant.
32
- Use ONLY the information provided in the context.
33
 
34
- RULES:
35
  1. Do not use outside knowledge.
36
- 2. If the answer is not present, say:
37
  "I'm sorry, but the provided documents do not contain information to answer this question."
38
 
39
  Context:
@@ -50,96 +50,91 @@ STRICT_PROMPT = PromptTemplate(
50
  )
51
 
52
  # --------------------------------------------------
53
- # 2. FILE LOADER
54
  # --------------------------------------------------
55
  def load_any(path: str):
56
- p = path.lower()
57
- if p.endswith(".pdf"):
58
  return PyPDFLoader(path).load()
59
- if p.endswith(".txt"):
60
  return TextLoader(path, encoding="utf-8").load()
61
- if p.endswith(".docx"):
62
  return Docx2txtLoader(path).load()
63
  return []
64
 
65
  # --------------------------------------------------
66
- # 3. PROCESS FILES / BUILD CHAIN
67
  # --------------------------------------------------
68
  def process_files(files, response_length):
69
  if not files or not GROQ_API_KEY:
70
- return None, "⚠️ Missing documents or GROQ_API key."
71
-
72
- try:
73
- docs = []
74
- for f in files:
75
- docs.extend(load_any(f.name))
76
-
77
- splitter = RecursiveCharacterTextSplitter(
78
- chunk_size=800,
79
- chunk_overlap=100
80
- )
81
- chunks = splitter.split_documents(docs)
82
-
83
- # --- Hybrid Retrieval ---
84
- embeddings = HuggingFaceEmbeddings(
85
- model_name="sentence-transformers/all-MiniLM-L6-v2"
86
- )
87
-
88
- faiss_db = FAISS.from_documents(chunks, embeddings)
89
- faiss_retriever = faiss_db.as_retriever(search_kwargs={"k": 3})
90
-
91
- bm25_retriever = BM25Retriever.from_documents(chunks)
92
- bm25_retriever.k = 3
93
-
94
- retriever = EnsembleRetriever(
95
- retrievers=[faiss_retriever, bm25_retriever],
96
- weights=[0.5, 0.5]
97
- )
98
-
99
- llm = ChatGroq(
100
- groq_api_key=GROQ_API_KEY,
101
- model="llama-3.3-70b-versatile",
102
- temperature=0,
103
- max_tokens=int(response_length)
104
- )
105
-
106
- memory = ConversationBufferMemory(
107
- memory_key="chat_history",
108
- return_messages=True,
109
- output_key="answer"
110
- )
111
-
112
- chain = ConversationalRetrievalChain.from_llm(
113
- llm=llm,
114
- retriever=retriever,
115
- combine_docs_chain_kwargs={"prompt": STRICT_PROMPT},
116
- memory=memory,
117
- return_source_documents=True,
118
- output_key="answer"
119
- )
120
-
121
- return chain, f"✅ Chatbot ready (max {response_length} tokens)"
122
-
123
- except Exception as e:
124
- return None, f"❌ Error: {str(e)}"
125
 
126
  # --------------------------------------------------
127
- # 4. CHAT FUNCTION
128
  # --------------------------------------------------
129
  def chat_function(message, history, chain):
130
  if chain is None:
131
- return "⚠️ Please build the chatbot first."
132
 
133
- result = chain.invoke({
134
  "question": message,
135
  "chat_history": history
136
  })
137
 
138
- answer = result["answer"]
139
 
140
  sources = {
141
- os.path.basename(doc.metadata.get("source", "unknown"))
142
- for doc in result.get("source_documents", [])
143
  }
144
 
145
  if sources:
@@ -148,31 +143,19 @@ def chat_function(message, history, chain):
148
  return answer
149
 
150
  # --------------------------------------------------
151
- # 5. GRADIO UI
152
  # --------------------------------------------------
153
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
154
- gr.Markdown("# 🛡️ Strict Hybrid Multi-RAG (Groq + FAISS + BM25)")
155
 
156
  chain_state = gr.State(None)
157
 
158
  with gr.Row():
159
  with gr.Column(scale=1):
160
- file_input = gr.File(
161
- file_count="multiple",
162
- label="Upload Documents"
163
- )
164
- len_slider = gr.Slider(
165
- 100, 4000, value=1000, step=100,
166
- label="Max Answer Tokens"
167
- )
168
- build_btn = gr.Button(
169
- "Build Chatbot",
170
- variant="primary"
171
- )
172
- status = gr.Textbox(
173
- label="Status",
174
- interactive=False
175
- )
176
 
177
  with gr.Column(scale=2):
178
  gr.ChatInterface(
@@ -180,9 +163,9 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
180
  additional_inputs=[chain_state]
181
  )
182
 
183
- build_btn.click(
184
  process_files,
185
- inputs=[file_input, len_slider],
186
  outputs=[chain_state, status]
187
  )
188
 
 
1
  import os
2
  import gradio as gr
3
 
4
+ # LangChain (CLASSIC / STABLE)
5
+ from langchain_classic.chains import ConversationalRetrievalChain
6
+ from langchain_classic.memory import ConversationBufferMemory
7
  from langchain.prompts import PromptTemplate
8
  from langchain.retrievers import EnsembleRetriever
9
 
 
20
  )
21
  from langchain_community.retrievers import BM25Retriever
22
 
23
+ # Text splitters
24
  from langchain_text_splitters import RecursiveCharacterTextSplitter
25
 
26
  # --------------------------------------------------
27
+ # API KEY
28
  # --------------------------------------------------
29
  GROQ_API_KEY = os.getenv("GROQ_API")
30
 
31
  STRICT_PROMPT_TEMPLATE = """You are a strict document-based assistant.
32
+ Use ONLY the provided context.
33
 
34
+ Rules:
35
  1. Do not use outside knowledge.
36
+ 2. If answer not found, say:
37
  "I'm sorry, but the provided documents do not contain information to answer this question."
38
 
39
  Context:
 
50
  )
51
 
52
  # --------------------------------------------------
53
+ # LOAD FILES
54
  # --------------------------------------------------
55
  def load_any(path: str):
56
+ path = path.lower()
57
+ if path.endswith(".pdf"):
58
  return PyPDFLoader(path).load()
59
+ if path.endswith(".txt"):
60
  return TextLoader(path, encoding="utf-8").load()
61
+ if path.endswith(".docx"):
62
  return Docx2txtLoader(path).load()
63
  return []
64
 
65
  # --------------------------------------------------
66
+ # BUILD RAG
67
  # --------------------------------------------------
68
  def process_files(files, response_length):
69
  if not files or not GROQ_API_KEY:
70
+ return None, "⚠️ Missing files or GROQ_API key"
71
+
72
+ docs = []
73
+ for f in files:
74
+ docs.extend(load_any(f.name))
75
+
76
+ splitter = RecursiveCharacterTextSplitter(
77
+ chunk_size=800,
78
+ chunk_overlap=100
79
+ )
80
+ chunks = splitter.split_documents(docs)
81
+
82
+ embeddings = HuggingFaceEmbeddings(
83
+ model_name="sentence-transformers/all-MiniLM-L6-v2"
84
+ )
85
+
86
+ faiss_db = FAISS.from_documents(chunks, embeddings)
87
+ faiss_retriever = faiss_db.as_retriever(search_kwargs={"k": 3})
88
+
89
+ bm25 = BM25Retriever.from_documents(chunks)
90
+ bm25.k = 3
91
+
92
+ retriever = EnsembleRetriever(
93
+ retrievers=[faiss_retriever, bm25],
94
+ weights=[0.5, 0.5]
95
+ )
96
+
97
+ llm = ChatGroq(
98
+ groq_api_key=GROQ_API_KEY,
99
+ model="llama-3.3-70b-versatile",
100
+ temperature=0,
101
+ max_tokens=int(response_length)
102
+ )
103
+
104
+ memory = ConversationBufferMemory(
105
+ memory_key="chat_history",
106
+ return_messages=True,
107
+ output_key="answer"
108
+ )
109
+
110
+ chain = ConversationalRetrievalChain.from_llm(
111
+ llm=llm,
112
+ retriever=retriever,
113
+ memory=memory,
114
+ combine_docs_chain_kwargs={"prompt": STRICT_PROMPT},
115
+ return_source_documents=True,
116
+ output_key="answer"
117
+ )
118
+
119
+ return chain, "✅ Chatbot built successfully"
 
 
 
 
 
120
 
121
  # --------------------------------------------------
122
+ # CHAT
123
  # --------------------------------------------------
124
  def chat_function(message, history, chain):
125
  if chain is None:
126
+ return "⚠️ Build the chatbot first"
127
 
128
+ res = chain.invoke({
129
  "question": message,
130
  "chat_history": history
131
  })
132
 
133
+ answer = res["answer"]
134
 
135
  sources = {
136
+ os.path.basename(d.metadata.get("source", "unknown"))
137
+ for d in res.get("source_documents", [])
138
  }
139
 
140
  if sources:
 
143
  return answer
144
 
145
  # --------------------------------------------------
146
+ # UI
147
  # --------------------------------------------------
148
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
149
+ gr.Markdown("# 🛡️ Strict Hybrid Multi-RAG")
150
 
151
  chain_state = gr.State(None)
152
 
153
  with gr.Row():
154
  with gr.Column(scale=1):
155
+ files = gr.File(file_count="multiple")
156
+ tokens = gr.Slider(100, 4000, 1000, step=100)
157
+ build = gr.Button("Build Chatbot", variant="primary")
158
+ status = gr.Textbox(interactive=False)
 
 
 
 
 
 
 
 
 
 
 
 
159
 
160
  with gr.Column(scale=2):
161
  gr.ChatInterface(
 
163
  additional_inputs=[chain_state]
164
  )
165
 
166
+ build.click(
167
  process_files,
168
+ inputs=[files, tokens],
169
  outputs=[chain_state, status]
170
  )
171