faiz0983 commited on
Commit
e90b7cb
·
verified ·
1 Parent(s): cc90a45

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -87
app.py CHANGED
@@ -1,7 +1,8 @@
1
  import os
2
  import gradio as gr
 
3
 
4
- # LangChain (CLASSIC / STABLE)
5
  from langchain.chains import ConversationalRetrievalChain
6
  from langchain.memory import ConversationBufferMemory
7
  from langchain.prompts import PromptTemplate
@@ -20,20 +21,18 @@ from langchain_community.document_loaders import (
20
  )
21
  from langchain_community.retrievers import BM25Retriever
22
 
23
- # Text splitters
24
  from langchain_text_splitters import RecursiveCharacterTextSplitter
25
 
26
- # --------------------------------------------------
27
- # API KEY
28
- # --------------------------------------------------
29
  GROQ_API_KEY = os.getenv("GROQ_API")
30
 
31
- STRICT_PROMPT_TEMPLATE = """You are a strict document-based assistant.
32
- Use ONLY the provided context.
 
33
 
34
  Rules:
35
- 1. Do not use outside knowledge.
36
- 2. If answer not found, say:
37
  "I'm sorry, but the provided documents do not contain information to answer this question."
38
 
39
  Context:
@@ -42,16 +41,11 @@ Context:
42
  Question: {question}
43
 
44
  Answer:
45
- """
46
-
47
- STRICT_PROMPT = PromptTemplate(
48
- template=STRICT_PROMPT_TEMPLATE,
49
  input_variables=["context", "question"]
50
  )
51
 
52
- # --------------------------------------------------
53
- # LOAD FILES
54
- # --------------------------------------------------
55
  def load_any(path: str):
56
  path = path.lower()
57
  if path.endswith(".pdf"):
@@ -62,79 +56,84 @@ def load_any(path: str):
62
  return Docx2txtLoader(path).load()
63
  return []
64
 
65
- # --------------------------------------------------
66
- # BUILD RAG
67
- # --------------------------------------------------
68
  def process_files(files, response_length):
69
- if not files or not GROQ_API_KEY:
70
- return None, "⚠️ Missing files or GROQ_API key"
71
-
72
- docs = []
73
- for f in files:
74
- docs.extend(load_any(f.name))
75
-
76
- splitter = RecursiveCharacterTextSplitter(
77
- chunk_size=800,
78
- chunk_overlap=100
79
- )
80
- chunks = splitter.split_documents(docs)
81
-
82
- embeddings = HuggingFaceEmbeddings(
83
- model_name="sentence-transformers/all-MiniLM-L6-v2"
84
- )
85
-
86
- faiss_db = FAISS.from_documents(chunks, embeddings)
87
- faiss_retriever = faiss_db.as_retriever(search_kwargs={"k": 3})
88
-
89
- bm25 = BM25Retriever.from_documents(chunks)
90
- bm25.k = 3
91
-
92
- retriever = EnsembleRetriever(
93
- retrievers=[faiss_retriever, bm25],
94
- weights=[0.5, 0.5]
95
- )
96
-
97
- llm = ChatGroq(
98
- groq_api_key=GROQ_API_KEY,
99
- model="llama-3.3-70b-versatile",
100
- temperature=0,
101
- max_tokens=int(response_length)
102
- )
103
-
104
- memory = ConversationBufferMemory(
105
- memory_key="chat_history",
106
- return_messages=True,
107
- output_key="answer"
108
- )
109
-
110
- chain = ConversationalRetrievalChain.from_llm(
111
- llm=llm,
112
- retriever=retriever,
113
- memory=memory,
114
- combine_docs_chain_kwargs={"prompt": STRICT_PROMPT},
115
- return_source_documents=True,
116
- output_key="answer"
117
- )
118
-
119
- return chain, "✅ Chatbot built successfully"
120
-
121
- # --------------------------------------------------
122
- # CHAT
123
- # --------------------------------------------------
 
 
 
 
 
124
  def chat_function(message, history, chain):
125
  if chain is None:
126
  return "⚠️ Build the chatbot first"
127
 
128
- res = chain.invoke({
129
  "question": message,
130
  "chat_history": history
131
  })
132
 
133
- answer = res["answer"]
134
 
135
  sources = {
136
- os.path.basename(d.metadata.get("source", "unknown"))
137
- for d in res.get("source_documents", [])
 
 
138
  }
139
 
140
  if sources:
@@ -142,20 +141,18 @@ def chat_function(message, history, chain):
142
 
143
  return answer
144
 
145
- # --------------------------------------------------
146
- # UI
147
- # --------------------------------------------------
148
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
149
- gr.Markdown("# 🛡️ Strict Hybrid Multi-RAG")
150
 
151
  chain_state = gr.State(None)
152
 
153
  with gr.Row():
154
  with gr.Column(scale=1):
155
- files = gr.File(file_count="multiple")
156
- tokens = gr.Slider(100, 4000, 1000, step=100)
157
  build = gr.Button("Build Chatbot", variant="primary")
158
- status = gr.Textbox(interactive=False)
159
 
160
  with gr.Column(scale=2):
161
  gr.ChatInterface(
@@ -170,4 +167,4 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
170
  )
171
 
172
  if __name__ == "__main__":
173
- demo.launch()
 
1
  import os
2
  import gradio as gr
3
+ import traceback
4
 
5
+ # ---------------- LangChain (STABLE 0.1.x) ----------------
6
  from langchain.chains import ConversationalRetrievalChain
7
  from langchain.memory import ConversationBufferMemory
8
  from langchain.prompts import PromptTemplate
 
21
  )
22
  from langchain_community.retrievers import BM25Retriever
23
 
 
24
  from langchain_text_splitters import RecursiveCharacterTextSplitter
25
 
26
+ # ---------------- CONFIG ----------------
 
 
27
  GROQ_API_KEY = os.getenv("GROQ_API")
28
 
29
+ STRICT_PROMPT = PromptTemplate(
30
+ template="""
31
+ You are a strict document-based assistant.
32
 
33
  Rules:
34
+ 1. ONLY use the provided context.
35
+ 2. If the answer is not in the context, say:
36
  "I'm sorry, but the provided documents do not contain information to answer this question."
37
 
38
  Context:
 
41
  Question: {question}
42
 
43
  Answer:
44
+ """,
 
 
 
45
  input_variables=["context", "question"]
46
  )
47
 
48
+ # ---------------- FILE LOADER ----------------
 
 
49
  def load_any(path: str):
50
  path = path.lower()
51
  if path.endswith(".pdf"):
 
56
  return Docx2txtLoader(path).load()
57
  return []
58
 
59
+ # ---------------- BUILD CHAIN ----------------
 
 
60
  def process_files(files, response_length):
61
+ if not files:
62
+ return None, " No files uploaded"
63
+ if not GROQ_API_KEY:
64
+ return None, "❌ GROQ_API secret not set"
65
+
66
+ try:
67
+ docs = []
68
+ for f in files:
69
+ docs.extend(load_any(f.path)) # HF SAFE
70
+
71
+ splitter = RecursiveCharacterTextSplitter(
72
+ chunk_size=800,
73
+ chunk_overlap=100
74
+ )
75
+ chunks = splitter.split_documents(docs)
76
+
77
+ embeddings = HuggingFaceEmbeddings(
78
+ model_name="sentence-transformers/all-MiniLM-L6-v2"
79
+ )
80
+
81
+ faiss_db = FAISS.from_documents(chunks, embeddings)
82
+ faiss_retriever = faiss_db.as_retriever(search_kwargs={"k": 3})
83
+
84
+ bm25 = BM25Retriever.from_documents(chunks)
85
+ bm25.k = 3
86
+
87
+ retriever = EnsembleRetriever(
88
+ retrievers=[faiss_retriever, bm25],
89
+ weights=[0.5, 0.5]
90
+ )
91
+
92
+ llm = ChatGroq(
93
+ groq_api_key=GROQ_API_KEY,
94
+ model="llama-3.3-70b-versatile",
95
+ temperature=0,
96
+ max_tokens=int(response_length)
97
+ )
98
+
99
+ memory = ConversationBufferMemory(
100
+ memory_key="chat_history",
101
+ return_messages=True,
102
+ output_key="answer"
103
+ )
104
+
105
+ chain = ConversationalRetrievalChain.from_llm(
106
+ llm=llm,
107
+ retriever=retriever,
108
+ memory=memory,
109
+ combine_docs_chain_kwargs={"prompt": STRICT_PROMPT},
110
+ return_source_documents=True,
111
+ output_key="answer"
112
+ )
113
+
114
+ return chain, "✅ Chatbot built successfully"
115
+
116
+ except Exception as e:
117
+ traceback.print_exc()
118
+ return None, f"❌ {repr(e)}"
119
+
120
+ # ---------------- CHAT ----------------
121
  def chat_function(message, history, chain):
122
  if chain is None:
123
  return "⚠️ Build the chatbot first"
124
 
125
+ result = chain.invoke({
126
  "question": message,
127
  "chat_history": history
128
  })
129
 
130
+ answer = result["answer"]
131
 
132
  sources = {
133
+ os.path.basename(
134
+ d.metadata.get("source", d.metadata.get("file_path", "unknown"))
135
+ )
136
+ for d in result.get("source_documents", [])
137
  }
138
 
139
  if sources:
 
141
 
142
  return answer
143
 
144
+ # ---------------- UI ----------------
145
+ with gr.Blocks() as demo:
146
+ gr.Markdown("# 🛡️ Strict Hybrid Multi-RAG (HF-Safe)")
 
 
147
 
148
  chain_state = gr.State(None)
149
 
150
  with gr.Row():
151
  with gr.Column(scale=1):
152
+ files = gr.File(file_count="multiple", label="Upload Documents")
153
+ tokens = gr.Slider(100, 4000, value=1000, step=100, label="Max Tokens")
154
  build = gr.Button("Build Chatbot", variant="primary")
155
+ status = gr.Textbox(label="Status", interactive=False)
156
 
157
  with gr.Column(scale=2):
158
  gr.ChatInterface(
 
167
  )
168
 
169
  if __name__ == "__main__":
170
+ demo.launch(theme=gr.themes.Soft())