Vipplav commited on
Commit
73d9537
·
verified ·
1 Parent(s): 3032a83
Files changed (2) hide show
  1. app.py +247 -63
  2. requirements.txt +9 -1
app.py CHANGED
@@ -1,64 +1,248 @@
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
-
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
-
9
-
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
- messages = [{"role": "system", "content": system_message}]
19
-
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
25
-
26
- messages.append({"role": "user", "content": message})
27
-
28
- response = ""
29
-
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
38
-
39
- response += token
40
- yield response
41
-
42
-
43
- """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- """
46
- demo = gr.ChatInterface(
47
- respond,
48
- additional_inputs=[
49
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
50
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
51
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
52
- gr.Slider(
53
- minimum=0.1,
54
- maximum=1.0,
55
- value=0.95,
56
- step=0.05,
57
- label="Top-p (nucleus sampling)",
58
- ),
59
- ],
60
- )
61
-
62
-
63
- if __name__ == "__main__":
64
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # === Imports ===
2
+ from pymongo import MongoClient
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
4
+ from sentence_transformers import SentenceTransformer, util
5
+ from langchain_community.llms import HuggingFacePipeline
6
+ from langchain_core.prompts import PromptTemplate
7
+ from difflib import get_close_matches
8
+ from datetime import datetime
9
+ import torch, re
10
  import gradio as gr
11
+
12
+
13
+ # === MongoDB Setup ===
14
+ mongo_uri = "mongodb+srv://vipplavai:pravip2025@cluster0.zcsijsa.mongodb.net/"
15
+ client = MongoClient(mongo_uri)
16
+ db = client["msme_schemes_db"]
17
+ udyam_coll = db["udyam_profiles"]
18
+ schemes_chunk_coll = db["schemes_chunks_only"]
19
+ schemes_info_coll = db["schemes_embedded"]
20
+ query_logs_coll = db["query_logs"]
21
+
22
+ # === UID Utility ===
23
+ def normalize_udyam(uid):
24
+ return uid.strip().upper().replace(" ", "")
25
+
26
+ def is_valid_udyam(uid):
27
+ return bool(re.match(r"^UDYAM-[A-Z]{2}-\d{2}-\d{6,7}$", uid))
28
+
29
+ def get_profile_by_uid(uid):
30
+ uid = normalize_udyam(uid)
31
+ if not is_valid_udyam(uid):
32
+ console.print("\n❌ That doesn't look like a valid Udyam Registration Number. Please double-check.", style="bold red")
33
+ return None
34
+ return udyam_coll.find_one({"Udyam_ID": uid}, {"_id": 0})
35
+
36
+ # === Summary ===
37
+ def summarize_profile(profile):
38
+ return (
39
+ f"The user represents an enterprise named '{profile['Enterprise Name']}', based in {profile['State']}, operating in the {profile['Major Activity']} sector. "
40
+ f"They identify as {profile['Gender']}, run a {profile['Enterprise Type']} sized {profile['Organisation Type'].lower()} organization. The enterprise has "
41
+ f"{profile['Employment']} employees, with an investment of ₹{profile['Investment Cost (In Rs.)']:,} and a turnover of ₹{profile['Net Turnover (In Rs.)']:,}."
42
+ )
43
+
44
+ # === Prompt Template ===
45
+ rephrase_template = PromptTemplate.from_template("""
46
+ You're a helpful assistant guiding Indian MSMEs to the best-matching government schemes.
47
+ Based on the enterprise profile, generate a clear, short one-line search query with keywords like state, sector, size, gender, and investment.
48
+ Only return the query. Avoid comments.
49
+ Enterprise Profile:
50
+ {profile_summary}
51
+ """)
52
+
53
+ # === Load LLM ===
54
+ model_id = "google/gemma-2b-it"
55
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
56
+ model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32)
57
+ generator = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=128, do_sample=False)
58
+ llm = HuggingFacePipeline(pipeline=generator)
59
+
60
+ # === Embedding Model ===
61
+ embed_model = SentenceTransformer("BAAI/bge-small-en-v1.5", device="cuda" if torch.cuda.is_available() else "cpu")
62
+
63
+ # === Query Generation ===
64
+ def generate_search_query(profile):
65
+ summary = summarize_profile(profile)
66
+ prompt = rephrase_template.format(profile_summary=summary)
67
+ response = llm.invoke(prompt)
68
+ return response.strip().split("\n")[0].strip()
69
+
70
+ # === Chunk Retrieval ===
71
+ def get_top_matching_schemes(query_text, top_k=5):
72
+ query_embedding = embed_model.encode(query_text, convert_to_tensor=True)
73
+ matches = []
74
+ for doc in schemes_chunk_coll.find({"rag_chunks": {"$exists": True}}):
75
+ for chunk in doc["rag_chunks"]:
76
+ if "embedding" in chunk and chunk["embedding"]:
77
+ chunk_tensor = torch.tensor(chunk["embedding"]).to(query_embedding.device)
78
+ score = util.cos_sim(query_embedding, chunk_tensor)[0][0].item()
79
+ matches.append({"score": score, "scheme_id": doc.get("scheme_id"), "scheme_name": doc.get("scheme_name")})
80
+ seen, top_results = set(), []
81
+ for m in sorted(matches, key=lambda x: x["score"], reverse=True):
82
+ if m["scheme_id"] not in seen:
83
+ top_results.append(m)
84
+ seen.add(m["scheme_id"])
85
+ if len(top_results) == top_k:
86
+ break
87
+ return top_results
88
+
89
+ # === Scheme Field with LLM Formatting ===
90
+ def fetch_scheme_field_llm(scheme_id, field_input):
91
+ field_map = {
92
+ "eligibility": "eligibility_list",
93
+ "benefits": "key_benefits_list",
94
+ "assistance": "assistance_list",
95
+ "apply": "how_to_apply_list",
96
+ "documents": "required_documents_list"
97
+ }
98
+ # figure out which section they asked for
99
+ matched_field = next(
100
+ (v for k, v in field_map.items() if k in field_input.lower()),
101
+ None
102
+ )
103
+ if not matched_field:
104
+ return "❌ Try asking about eligibility, benefits, how to apply, or documents."
105
+
106
+ # fetch the scheme document
107
+ doc = schemes_info_coll.find_one({"scheme_id": scheme_id})
108
+ if not doc or matched_field not in doc:
109
+ return "⚠️ Couldn’t find that information for the selected scheme."
110
+
111
+ # take up to first 5 list items
112
+ raw_text = "\n".join(doc[matched_field][:5])
113
+ prompt = f"""
114
+ Summarize the following information into a clear and professional explanation for business owners:
115
+
116
+ Scheme: {doc['scheme_name']}
117
+ Section: {matched_field.replace('_list','').title()}
118
+
119
+ {raw_text}
120
+ """.strip()
121
+
122
+ response = llm.invoke(prompt).strip()
123
+ section_title = matched_field.replace('_list','').replace('_',' ').title()
124
+ return f"📄 **{section_title} for {doc['scheme_name']}:**\n{response}"
125
+
126
+
127
+ # === Gradio UI ===
128
+ def start_session(udyam_id):
129
+ """
130
+ Called when user submits an Udyam ID (or leaves blank for manual).
131
+ Returns:
132
+ - A list of chat history tuples to seed the Chatbot
133
+ - A dict storing our profile in `gr.State`
134
+ """
135
+ if udyam_id:
136
+ profile = get_profile_by_uid(udyam_id)
137
+ if profile is None:
138
+ # invalid ID
139
+ return [("system","❌ That doesn't look valid. Please correct or go manual.")], {}
140
+ else:
141
+ # empty => we’ll ask for manual details next
142
+ return [("system","📝 Please fill in your enterprise details below.")], {}
143
+
144
+ # valid profile fetched
145
+ summary = summarize_profile(profile)
146
+ query = generate_search_query(profile)
147
+ schemes = get_top_matching_schemes(query)
148
+ response = (
149
+ f"✅ Profile OK:\n{summary}\n\n"
150
+ f"🔍 Query: {query}\n\n"
151
+ "📈 Top schemes:\n" +
152
+ "\n".join(f"{i+1}. {s['scheme_name']} (score {s['score']:.3f})"
153
+ for i,s in enumerate(schemes))
154
+ )
155
+ # store for later steps
156
+ state = {"profile": profile, "schemes": schemes}
157
+ return [("user", udyam_id or "<manual>"), ("assistant", response)], state
158
+
159
+ def handle_manual(enterprise_name, gender, ent_type, org_type, activity, state):
160
+ """
161
+ Called when user submits manual-entered profile.
162
+ """
163
+ profile = {
164
+ "Enterprise Name": enterprise_name,
165
+ "Gender": gender,
166
+ "Enterprise Type": ent_type,
167
+ "Organisation Type": org_type,
168
+ "Major Activity": activity,
169
+ # …you can add investment, turnover, employment later
170
+ }
171
+ summary = summarize_profile(profile)
172
+ query = generate_search_query(profile)
173
+ schemes = get_top_matching_schemes(query)
174
+ response = (
175
+ f"✅ Profile recorded:\n{summary}\n\n"
176
+ f"🔍 Query: {query}\n\n"
177
+ "📈 Top schemes:\n" +
178
+ "\n".join(f"{i+1}. {s['scheme_name']} (score {s['score']:.3f})"
179
+ for i,s in enumerate(schemes))
180
+ )
181
+ state["profile"] = profile
182
+ state["schemes"] = schemes
183
+ return [("assistant", response)], state
184
+
185
+ def chat_with_scheme(message, state):
186
+ """
187
+ Called once a scheme is selected or user asks for eligibility/benefits.
188
+ """
189
+ # assume the user typed “3” or the scheme name
190
+ # map to scheme_id via state["schemes"]
191
+ # then call fetch_scheme_field_llm(...)
192
+ scheme_map = {str(i+1): s for i,s in enumerate(state["schemes"])}
193
+ key = message.strip()
194
+ if key in scheme_map:
195
+ sid = scheme_map[key]["scheme_id"]
196
+ state["current_scheme_id"] = sid
197
+ doc = schemes_info_coll.find_one({"scheme_id": sid})
198
+ title = doc["scheme_name"]
199
+ return [("assistant", f"🎯 *{title}* selected. What would you like to know? (eligibility, benefits, apply, docs)")]
200
+ elif "current_scheme_id" in state:
201
+ # interpret as field query
202
+ output = fetch_scheme_field_llm(state["current_scheme_id"], message)
203
+ return [("assistant", output)], state
204
+ else:
205
+ return [("assistant","❓ Please pick a scheme number first.")], state
206
+
207
+ with gr.Blocks() as demo:
208
+ gr.Markdown("# 🚀 MSME Scheme Assistant")
209
+ # Step 1: Udyam ID or Manual
210
+ udyam_in = gr.Textbox(label="Enter your Udyam ID (or leave blank for manual)")
211
+ start_btn = gr.Button("Start")
212
+ chatbot = gr.Chatbot()
213
+ state = gr.State({}) # will hold profile, schemes, etc.
214
+
215
+ # Step 2a: Manual fields (initially hidden)
216
+ with gr.Row(visible=False) as manual_row:
217
+ ent_name = gr.Textbox(label="Enterprise Name")
218
+ gender = gr.Radio(["Male","Female","Other"], label="Gender")
219
+ ent_type = gr.Dropdown(["Micro","Small","Medium"], label="Enterprise Size")
220
+ org_type = gr.Dropdown(["Proprietorship","LLP","Private Ltd."], label="Organisation Type")
221
+ activity = gr.Textbox(label="Major Activity (Manufacturing/Services)")
222
+ manual_btn = gr.Button("Submit Manual Profile")
223
+
224
+ # wiring:
225
+ start_btn.click(fn=start_session,
226
+ inputs=[udyam_in],
227
+ outputs=[chatbot, state])
228
+ def should_show_manual(udyam_id):
229
+ # only show manual fields when user leaves Udyam blank
230
+ return gr.update(visible=(not udyam_id.strip()))
231
+
232
+ start_btn.click(
233
+ fn=should_show_manual,
234
+ inputs=[udyam_in],
235
+ outputs=[manual_row]
236
+ )
237
+
238
+ manual_btn.click(fn=handle_manual,
239
+ inputs=[ent_name, gender, ent_type, org_type, activity, state],
240
+ outputs=[chatbot, state])
241
+
242
+ # Finally, let them chat about schemes
243
+ msg = gr.Textbox(placeholder="Ask about schemes…")
244
+ msg.submit(fn=chat_with_scheme,
245
+ inputs=[msg, state],
246
+ outputs=[chatbot, state])
247
+
248
+ demo.launch()
requirements.txt CHANGED
@@ -1 +1,9 @@
1
- huggingface_hub==0.25.2
 
 
 
 
 
 
 
 
 
1
+ huggingface_hub==0.25.2
2
+ pymongo>=4.3.0
3
+ transformers>=4.35.0
4
+ sentence-transformers>=2.2.2
5
+ torch>=2.0.1
6
+ langchain>=0.0.200
7
+ langchain-community>=0.0.30
8
+ gradio>=3.44.0
9
+ accelerate