shreyankisiri commited on
Commit
ef5fb09
·
verified ·
1 Parent(s): 9332cd5

Upload 62 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +2 -0
  2. GS_Sales_Proposal/.env +3 -0
  3. GS_Sales_Proposal/.gitignore +5 -0
  4. GS_Sales_Proposal/Client/__init__.py +0 -0
  5. GS_Sales_Proposal/Client/__pycache__/__init__.cpython-313.pyc +0 -0
  6. GS_Sales_Proposal/Client/__pycache__/client.cpython-313.pyc +3 -0
  7. GS_Sales_Proposal/Client/__pycache__/client_css.cpython-313.pyc +0 -0
  8. GS_Sales_Proposal/Client/__pycache__/client_dataclass.cpython-313.pyc +0 -0
  9. GS_Sales_Proposal/Client/__pycache__/client_utils.cpython-313.pyc +0 -0
  10. GS_Sales_Proposal/Client/client.py +0 -0
  11. GS_Sales_Proposal/Client/client_css.py +448 -0
  12. GS_Sales_Proposal/Client/client_dataclass.py +210 -0
  13. GS_Sales_Proposal/Client/client_utils.py +140 -0
  14. GS_Sales_Proposal/Document_Upload_Vectordb/__init__.py +0 -0
  15. GS_Sales_Proposal/Document_Upload_Vectordb/doc_vectorizer.py +564 -0
  16. GS_Sales_Proposal/Document_Upload_Vectordb/doc_xtraction_utils.py +14 -0
  17. GS_Sales_Proposal/Document_Upload_Vectordb/pain_points_extractor.py +44 -0
  18. GS_Sales_Proposal/Document_Upload_Vectordb/prompts.py +39 -0
  19. GS_Sales_Proposal/Document_Upload_Vectordb/rfi2.pdf +3 -0
  20. GS_Sales_Proposal/LICENSE +21 -0
  21. GS_Sales_Proposal/Recommendation/__init__.py +0 -0
  22. GS_Sales_Proposal/Recommendation/__pycache__/__init__.cpython-313.pyc +0 -0
  23. GS_Sales_Proposal/Recommendation/__pycache__/prompts.cpython-313.pyc +0 -0
  24. GS_Sales_Proposal/Recommendation/__pycache__/recommendation_utils.cpython-313.pyc +0 -0
  25. GS_Sales_Proposal/Recommendation/prompts.py +107 -0
  26. GS_Sales_Proposal/Recommendation/recommendation_utils.py +26 -0
  27. GS_Sales_Proposal/Search/Linkedin/__pycache__/linkedin_serp.cpython-313.pyc +0 -0
  28. GS_Sales_Proposal/Search/Linkedin/linkedin_agent_runner_unused.py +66 -0
  29. GS_Sales_Proposal/Search/Linkedin/linkedin_agent_unused.py +63 -0
  30. GS_Sales_Proposal/Search/Linkedin/linkedin_serp.py +40 -0
  31. GS_Sales_Proposal/Search/WebsiteUrl_Agent/__pycache__/agent.cpython-312.pyc +0 -0
  32. GS_Sales_Proposal/Search/WebsiteUrl_Agent/__pycache__/agent.cpython-313.pyc +0 -0
  33. GS_Sales_Proposal/Search/WebsiteUrl_Agent/__pycache__/agent_runner.cpython-313.pyc +0 -0
  34. GS_Sales_Proposal/Search/WebsiteUrl_Agent/agent.py +67 -0
  35. GS_Sales_Proposal/Search/WebsiteUrl_Agent/agent_runner.py +68 -0
  36. GS_Sales_Proposal/Search/__pycache__/linkedin_serp.cpython-313.pyc +0 -0
  37. GS_Sales_Proposal/Seller/__pycache__/seller.cpython-313.pyc +0 -0
  38. GS_Sales_Proposal/Seller/__pycache__/seller_css.cpython-313.pyc +0 -0
  39. GS_Sales_Proposal/Seller/__pycache__/seller_utils.cpython-313.pyc +0 -0
  40. GS_Sales_Proposal/Seller/seller.py +463 -0
  41. GS_Sales_Proposal/Seller/seller_css.py +430 -0
  42. GS_Sales_Proposal/Seller/seller_utils.py +46 -0
  43. GS_Sales_Proposal/WebScraper/__pycache__/scrape.cpython-313.pyc +0 -0
  44. GS_Sales_Proposal/WebScraper/__pycache__/scrape_utils.cpython-313.pyc +0 -0
  45. GS_Sales_Proposal/WebScraper/__pycache__/state.cpython-313.pyc +0 -0
  46. GS_Sales_Proposal/WebScraper/main.py +8 -0
  47. GS_Sales_Proposal/WebScraper/scrape.py +122 -0
  48. GS_Sales_Proposal/WebScraper/scrape_utils.py +32 -0
  49. GS_Sales_Proposal/WebScraper/state.py +8 -0
  50. GS_Sales_Proposal/WebsiteUrl_Agent/__pycache__/agent.cpython-312.pyc +0 -0
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ GS_Sales_Proposal/Client/__pycache__/client.cpython-313.pyc filter=lfs diff=lfs merge=lfs -text
37
+ GS_Sales_Proposal/Document_Upload_Vectordb/rfi2.pdf filter=lfs diff=lfs merge=lfs -text
GS_Sales_Proposal/.env ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ GOOGLE_API_KEY=AIzaSyC_ojLXS1ysa83mUerhxxNMWIK10Z1MqjQ
2
+ SERP_API_KEY = 0900f7a2f830083d80385bc46c1ff1f1e6626da3f568de640fe13759e9655450
3
+ FILE_SAVE_PATH = "Document_Upload_Vectordb/Files"
GS_Sales_Proposal/.gitignore ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ .env
2
+ .pycache
3
+ .pyc
4
+ .ipynb
5
+ .__pycache__
GS_Sales_Proposal/Client/__init__.py ADDED
File without changes
GS_Sales_Proposal/Client/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (151 Bytes). View file
 
GS_Sales_Proposal/Client/__pycache__/client.cpython-313.pyc ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bcc0854139a4b9db9c029168978b5677fb1df4293bd788a31cbef0c5b5dfd83
3
+ size 104585
GS_Sales_Proposal/Client/__pycache__/client_css.cpython-313.pyc ADDED
Binary file (12.3 kB). View file
 
GS_Sales_Proposal/Client/__pycache__/client_dataclass.cpython-313.pyc ADDED
Binary file (11.5 kB). View file
 
GS_Sales_Proposal/Client/__pycache__/client_utils.cpython-313.pyc ADDED
Binary file (5.42 kB). View file
 
GS_Sales_Proposal/Client/client.py ADDED
The diff for this file is too large to render. See raw diff
 
GS_Sales_Proposal/Client/client_css.py ADDED
@@ -0,0 +1,448 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ client_css = """
2
+ <style>
3
+ .client-section {
4
+ background: #f5f5f5;
5
+ padding: 1.5rem;
6
+ border-radius: 10px;
7
+ border-left: 4px solid #667eea;
8
+ margin-bottom: 1rem;
9
+ color: #2a2a2a;
10
+ }
11
+
12
+ .url-section {
13
+ background: #f5f5f5;
14
+ padding: 1.5rem;
15
+ border-radius: 10px;
16
+ border-left: 4px solid #764ba2;
17
+ margin-bottom: 1rem;
18
+ color: #2a2a2a;
19
+ }
20
+
21
+ .document-section {
22
+ background: #f5f5f5;
23
+ padding: 1.5rem;
24
+ border-radius: 10px;
25
+ border: 2px solid #5a9f9f;
26
+ margin-bottom: 1rem;
27
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.3);
28
+ color: #2a2a2a;
29
+ }
30
+
31
+ .pain-points-section {
32
+ background: #f5f5f5;
33
+ padding: 1.5rem;
34
+ border-radius: 10px;
35
+ border-left: 4px solid #ffc107;
36
+ color: #2a2a2a;
37
+ }
38
+
39
+ .roles-section {
40
+ background: #f5f5f5;
41
+ padding: 1.5rem;
42
+ border-radius: 10px;
43
+ border-left: 4px solid #2196f3;
44
+ color: #2a2a2a;
45
+ }
46
+
47
+ .priorities-section {
48
+ background: #f5f5f5;
49
+ padding: 1.5rem;
50
+ border-radius: 10px;
51
+ border-left: 4px solid #9c27b0;
52
+ color: #2a2a2a;
53
+ }
54
+
55
+ .ai-suggestion-section {
56
+ background: #f5f5f5;
57
+ padding: 1.5rem;
58
+ border-radius: 10px;
59
+ border-left: 4px solid #00bcd4;
60
+ color: #2a2a2a;
61
+ }
62
+
63
+ .upload-section {
64
+ border: 2px dashed #667eea;
65
+ border-radius: 10px;
66
+ padding: 2rem;
67
+ text-align: center;
68
+ background: #f5f5f5;
69
+ color: #2a2a2a;
70
+ }
71
+
72
+ /* Style section headers */
73
+ .section-header {
74
+ color: #2a2a2a;
75
+ font-size: 1.2rem;
76
+ font-weight: 600;
77
+ margin-bottom: 1rem;
78
+ }
79
+
80
+ /* Mandatory field styling */
81
+ .mandatory-label {
82
+ color: #e74c3c;
83
+ font-weight: 600;
84
+ }
85
+
86
+ .field-warning {
87
+ color: #e74c3c;
88
+ font-size: 0.85rem;
89
+ margin-top: 0.25rem;
90
+ font-weight: 500;
91
+ background: rgba(231, 76, 60, 0.1);
92
+ padding: 0.5rem;
93
+ border-radius: 4px;
94
+ border-left: 3px solid #e74c3c;
95
+ }
96
+
97
+ .optional-label {
98
+ color: #666666;
99
+ font-size: 0.8rem;
100
+ font-style: italic;
101
+ }
102
+
103
+ .ai-label {
104
+ color: #00bcd4;
105
+ font-size: 0.8rem;
106
+ font-style: italic;
107
+ }
108
+
109
+ /* Custom styling for URL buttons */
110
+ .url-button-container {
111
+ display: flex;
112
+ gap: 5px;
113
+ align-items: center;
114
+ }
115
+
116
+ .url-button {
117
+ background: #667eea;
118
+ color: white;
119
+ border: none;
120
+ padding: 8px 12px;
121
+ border-radius: 6px;
122
+ cursor: pointer;
123
+ font-size: 14px;
124
+ transition: background-color 0.3s;
125
+ }
126
+
127
+ .url-button:hover {
128
+ background: #5a6fd8;
129
+ }
130
+
131
+ /* Summary item styling */
132
+ .summary-item {
133
+ background: #f5f5f5;
134
+ border: 1px solid #5a9f9f;
135
+ border-radius: 8px;
136
+ padding: 12px;
137
+ margin-bottom: 8px;
138
+ display: flex;
139
+ justify-content: space-between;
140
+ align-items: center;
141
+ color: #2a2a2a;
142
+ }
143
+
144
+ .summary-key {
145
+ font-weight: 600;
146
+ color: #667eea;
147
+ }
148
+
149
+ .add-button {
150
+ background: #28a745;
151
+ color: white;
152
+ border: none;
153
+ padding: 6px 12px;
154
+ border-radius: 4px;
155
+ cursor: pointer;
156
+ font-size: 12px;
157
+ font-weight: bold;
158
+ }
159
+
160
+ .add-button:hover {
161
+ background: #218838;
162
+ }
163
+
164
+ .summary-buttons {
165
+ display: flex;
166
+ gap: 8px;
167
+ margin-bottom: 12px;
168
+ }
169
+
170
+ .summary-control-btn {
171
+ background: #007bff;
172
+ color: white;
173
+ border: none;
174
+ padding: 6px 12px;
175
+ border-radius: 4px;
176
+ cursor: pointer;
177
+ font-size: 12px;
178
+ }
179
+
180
+ .summary-control-btn:hover {
181
+ background: #0056b3;
182
+ }
183
+
184
+ /* Fixed tooltip label alignment */
185
+ .tooltip-label {
186
+ font-size: 16px;
187
+ font-weight: bold;
188
+ margin-bottom: 8px;
189
+ display: flex;
190
+ align-items: center;
191
+ gap: 6px;
192
+ height: 24px;
193
+ line-height: 24px;
194
+ min-height: 32px;
195
+ display: flex;
196
+ align-items: flex-end;
197
+ }
198
+
199
+ .tooltip-icon {
200
+ position: relative;
201
+ display: inline-block;
202
+ cursor: pointer;
203
+ margin-left: 0;
204
+ }
205
+
206
+ .tooltip-icon::after {
207
+ content: attr(data-tooltip);
208
+ visibility: hidden;
209
+ width: 250px;
210
+ background-color: #555;
211
+ color: #fff;
212
+ text-align: left;
213
+ border-radius: 6px;
214
+ padding: 8px;
215
+ position: absolute;
216
+ z-index: 1;
217
+ bottom: 125%;
218
+ left: 50%;
219
+ margin-left: -125px;
220
+ opacity: 0;
221
+ transition: opacity 0.3s;
222
+ }
223
+
224
+ .tooltip-icon:hover::after {
225
+ visibility: visible;
226
+ opacity: 1;
227
+ }
228
+
229
+ /* Streamlit input elements styling - ALL INPUTS */
230
+
231
+ /* Text Input */
232
+ .stTextInput > div > div > input {
233
+ background-color: #f5f5f5 !important;
234
+ color: #2a2a2a !important;
235
+ border: 2px solid #5a9f9f !important;
236
+ border-radius: 8px !important;
237
+ padding: 12px !important;
238
+ font-size: 14px !important;
239
+ }
240
+
241
+ /* Text Area */
242
+ .stTextArea > div > div > textarea {
243
+ background-color: #f5f5f5 !important;
244
+ color: #2a2a2a !important;
245
+ border: 2px solid #5a9f9f !important;
246
+ border-radius: 8px !important;
247
+ padding: 12px !important;
248
+ font-size: 14px !important;
249
+ }
250
+
251
+ /* Number Input */
252
+ .stNumberInput > div > div > input {
253
+ background-color: #f5f5f5 !important;
254
+ color: #2a2a2a !important;
255
+ border: 2px solid #5a9f9f !important;
256
+ border-radius: 8px !important;
257
+ padding: 12px !important;
258
+ font-size: 14px !important;
259
+ }
260
+
261
+ /* Select Box */
262
+ .stSelectbox > div > div > div {
263
+ background-color: #f5f5f5 !important;
264
+ color: #2a2a2a !important;
265
+ border: 2px solid #5a9f9f !important;
266
+ border-radius: 8px !important;
267
+ }
268
+
269
+ /* Multiselect */
270
+ .stMultiSelect > div > div > div {
271
+ background-color: #f5f5f5 !important;
272
+ color: #2a2a2a !important;
273
+ border: 2px solid #5a9f9f !important;
274
+ border-radius: 8px !important;
275
+ }
276
+
277
+ /* Date Input */
278
+ .stDateInput > div > div > input {
279
+ background-color: #f5f5f5 !important;
280
+ color: #2a2a2a !important;
281
+ border: 2px solid #5a9f9f !important;
282
+ border-radius: 8px !important;
283
+ padding: 12px !important;
284
+ font-size: 14px !important;
285
+ }
286
+
287
+ /* Time Input */
288
+ .stTimeInput > div > div > input {
289
+ background-color: #f5f5f5 !important;
290
+ color: #2a2a2a !important;
291
+ border: 2px solid #5a9f9f !important;
292
+ border-radius: 8px !important;
293
+ padding: 12px !important;
294
+ font-size: 14px !important;
295
+ }
296
+
297
+ /* File Uploader */
298
+ .stFileUploader > div > div {
299
+ background-color: #f5f5f5 !important;
300
+ color: #2a2a2a !important;
301
+ border: 2px solid #5a9f9f !important;
302
+ border-radius: 8px !important;
303
+ }
304
+
305
+ /* REDUCED HEIGHT FOR UPLOADED FILE DISPLAY */
306
+ /* Target the uploaded file container */
307
+ .stFileUploader div[data-testid="stFileUploaderFileName"] {
308
+ min-height: 30px !important;
309
+ height: 30px !important;
310
+ padding: 4px 8px !important;
311
+ margin: 2px 0 !important;
312
+ display: flex !important;
313
+ align-items: center !important;
314
+ color: #999999 !important;
315
+ font-size: 12px !important;
316
+ line-height: 1.2 !important;
317
+ }
318
+
319
+ /* Reduce height of the file uploader section after upload */
320
+ .stFileUploader section[data-testid="stFileUploaderDropzone"] {
321
+ min-height: 40px !important;
322
+ height: auto !important;
323
+ padding: 8px !important;
324
+ margin: 4px 0 !important;
325
+ }
326
+
327
+ /* Target any uploaded file display elements */
328
+ .stFileUploader [data-testid="fileUploaderFileName"],
329
+ .stFileUploader [data-testid="stFileUploaderFileName"] > div,
330
+ .stFileUploader div[role="button"] {
331
+ min-height: 30px !important;
332
+ height: 30px !important;
333
+ padding: 4px 8px !important;
334
+ margin: 2px 0 !important;
335
+ line-height: 1.2 !important;
336
+ font-size: 12px !important;
337
+ }
338
+
339
+ /* Compact the entire file uploader when files are uploaded */
340
+ .stFileUploader:has([data-testid="stFileUploaderFileName"]) {
341
+ min-height: 40px !important;
342
+ }
343
+
344
+ .stFileUploader:has([data-testid="stFileUploaderFileName"]) > div {
345
+ min-height: 40px !important;
346
+ padding: 4px !important;
347
+ }
348
+
349
+ /* File Uploader - Uploaded file display text (light grey) */
350
+ .stFileUploader div[data-testid="stFileUploaderFileName"],
351
+ .stFileUploader div[data-testid="fileUploaderDropzone"] span,
352
+ .stFileUploader div[data-testid="fileUploaderDropzone"] p,
353
+ .stFileUploader section span,
354
+ .stFileUploader section p,
355
+ .stFileUploader [data-testid="fileUploaderFileName"],
356
+ .stFileUploader small {
357
+ color: #999999 !important; /* Light grey for uploaded file names and text */
358
+ font-size: 12px !important;
359
+ line-height: 1.2 !important;
360
+ }
361
+
362
+ /* File uploader drag and drop area */
363
+ .stFileUploader section {
364
+ background-color: #f5f5f5 !important;
365
+ border: 2px dashed #5a9f9f !important;
366
+ border-radius: 8px !important;
367
+ }
368
+
369
+ /* File uploader text content - making it light grey */
370
+ .stFileUploader section div,
371
+ .stFileUploader section span,
372
+ .stFileUploader section small {
373
+ color: #999999 !important; /* Light grey for all file uploader text */
374
+ font-size: 12px !important;
375
+ line-height: 1.2 !important;
376
+ }
377
+
378
+ /* Color Picker */
379
+ .stColorPicker > div > div > input {
380
+ background-color: #f5f5f5 !important;
381
+ border: 2px solid #5a9f9f !important;
382
+ border-radius: 8px !important;
383
+ }
384
+
385
+ /* Focus states for all inputs */
386
+ .stTextInput > div > div > input:focus,
387
+ .stTextArea > div > div > textarea:focus,
388
+ .stNumberInput > div > div > input:focus,
389
+ .stDateInput > div > div > input:focus,
390
+ .stTimeInput > div > div > input:focus {
391
+ border-color: #667eea !important;
392
+ box-shadow: 0 0 0 2px rgba(102, 126, 234, 0.2) !important;
393
+ outline: none !important;
394
+ background-color: #f5f5f5 !important;
395
+ color: #2a2a2a !important;
396
+ }
397
+
398
+ /* Active/typing states to ensure text stays visible */
399
+ .stTextInput > div > div > input:active,
400
+ .stTextArea > div > div > textarea:active,
401
+ .stNumberInput > div > div > input:active,
402
+ .stDateInput > div > div > input:active,
403
+ .stTimeInput > div > div > input:active {
404
+ background-color: #f5f5f5 !important;
405
+ color: #2a2a2a !important;
406
+ }
407
+
408
+ /* Placeholder text for all inputs */
409
+ .stTextInput > div > div > input::placeholder,
410
+ .stTextArea > div > div > textarea::placeholder,
411
+ .stNumberInput > div > div > input::placeholder,
412
+ .stDateInput > div > div > input::placeholder,
413
+ .stTimeInput > div > div > input::placeholder {
414
+ color: #666666 !important;
415
+ opacity: 0.7 !important;
416
+ }
417
+
418
+ /* Labels for all input types */
419
+ .stTextInput > label,
420
+ .stTextArea > label,
421
+ .stNumberInput > label,
422
+ .stSelectbox > label,
423
+ .stMultiSelect > label,
424
+ .stDateInput > label,
425
+ .stTimeInput > label,
426
+ .stFileUploader > label,
427
+ .stColorPicker > label {
428
+ color: #2a2a2a !important;
429
+ font-weight: 600 !important;
430
+ margin-bottom: 8px !important;
431
+ }
432
+
433
+ /* Dropdown options styling */
434
+ .stSelectbox div[data-baseweb="select"] > div > div,
435
+ .stMultiSelect div[data-baseweb="select"] > div > div {
436
+ background-color: #f5f5f5 !important;
437
+ }
438
+
439
+ input,
440
+ textarea,
441
+ select,
442
+ .stSelectbox,
443
+ .stMultiSelect {
444
+ color: #2a2a2a !important;
445
+ }
446
+
447
+ </style>
448
+ """
GS_Sales_Proposal/Client/client_dataclass.py ADDED
@@ -0,0 +1,210 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass, field
2
+ from typing import List, Dict, Set, Optional
3
+ import streamlit as st
4
+
5
+ @dataclass
6
+ class ClientData:
7
+ """Centralized data structure for client information"""
8
+
9
+ # Basic client information
10
+ enterprise_name: str = ""
11
+ website_url: str = ""
12
+ website_urls_list: List[str] = field(default_factory=list)
13
+
14
+ # Client details and requirements
15
+ enterprise_details_content: str = ""
16
+ client_requirements_content: str = ""
17
+ client_additional_requirements_content: str = ""
18
+
19
+ # SPOC information
20
+ spoc_name: str = ""
21
+ spoc_linkedin_profile: str = ""
22
+ linkedin_profiles: Dict[str, Dict] = field(default_factory=dict)
23
+ last_searched_spoc: str = ""
24
+ current_selected_profile_url: Optional[str] = None
25
+
26
+ # File handling
27
+ uploaded_file_path: Optional[str] = None
28
+ document_analyzed: bool = False
29
+
30
+ # Pain points and specifications
31
+ rfi_pain_points_items: Dict[str, str] = field(default_factory=dict)
32
+ selected_pain_points: Set[str] = field(default_factory=set)
33
+ pain_point_content_map: Dict[str, str] = field(default_factory=dict)
34
+
35
+ # Additional specifications
36
+ additional_specs_items: Dict[str, str] = field(default_factory=dict)
37
+ selected_additional_specs: Set[str] = field(default_factory=set)
38
+ additional_specs_content_map: Dict[str, str] = field(default_factory=dict)
39
+
40
+ # Role and priority management
41
+ selected_target_roles: List[str] = field(default_factory=list)
42
+ selected_business_priorities: List[str] = field(default_factory=list)
43
+
44
+ # UI state management
45
+ show_validation: bool = False
46
+ processing_rfi: bool = False
47
+ scraping_in_progress: bool = False
48
+ pending_scrape_url: Optional[str] = None
49
+ css_applied: bool = False
50
+ last_analyzed_url: Optional[str] = None
51
+ debug_mode: bool = False
52
+
53
+ def to_dict(self) -> dict:
54
+ """Convert dataclass to dictionary for session state storage"""
55
+ return {
56
+ 'enterprise_name': self.enterprise_name,
57
+ 'website_url': self.website_url,
58
+ 'website_urls_list': self.website_urls_list,
59
+ 'enterprise_details_content': self.enterprise_details_content,
60
+ 'client_requirements_content': self.client_requirements_content,
61
+ 'client_additional_requirements_content': self.client_additional_requirements_content,
62
+ 'spoc_name': self.spoc_name,
63
+ 'spoc_linkedin_profile': self.spoc_linkedin_profile,
64
+ 'linkedin_profiles': self.linkedin_profiles,
65
+ 'last_searched_spoc': self.last_searched_spoc,
66
+ 'current_selected_profile_url': self.current_selected_profile_url,
67
+ 'uploaded_file_path': self.uploaded_file_path,
68
+ 'document_analyzed': self.document_analyzed,
69
+ 'rfi_pain_points_items': self.rfi_pain_points_items,
70
+ 'selected_pain_points': self.selected_pain_points,
71
+ 'pain_point_content_map': self.pain_point_content_map,
72
+ 'additional_specs_items': self.additional_specs_items,
73
+ 'selected_additional_specs': self.selected_additional_specs,
74
+ 'additional_specs_content_map': self.additional_specs_content_map,
75
+ 'selected_target_roles': self.selected_target_roles,
76
+ 'selected_business_priorities': self.selected_business_priorities,
77
+ 'show_validation': self.show_validation,
78
+ 'processing_rfi': self.processing_rfi,
79
+ 'scraping_in_progress': self.scraping_in_progress,
80
+ 'pending_scrape_url': self.pending_scrape_url,
81
+ 'css_applied': self.css_applied,
82
+ 'last_analyzed_url': self.last_analyzed_url,
83
+ 'debug_mode': self.debug_mode
84
+ }
85
+
86
+ @classmethod
87
+ def from_dict(cls, data: dict) -> 'ClientData':
88
+ """Create ClientData instance from dictionary"""
89
+ return cls(
90
+ enterprise_name=data.get('enterprise_name', ''),
91
+ website_url=data.get('website_url', ''),
92
+ website_urls_list=data.get('website_urls_list', []),
93
+ enterprise_details_content=data.get('enterprise_details_content', ''),
94
+ client_requirements_content=data.get('client_requirements_content', ''),
95
+ client_additional_requirements_content=data.get('client_additional_requirements_content', ''),
96
+ spoc_name=data.get('spoc_name', ''),
97
+ spoc_linkedin_profile=data.get('spoc_linkedin_profile', ''),
98
+ linkedin_profiles=data.get('linkedin_profiles', {}),
99
+ last_searched_spoc=data.get('last_searched_spoc', ''),
100
+ current_selected_profile_url=data.get('current_selected_profile_url'),
101
+ uploaded_file_path=data.get('uploaded_file_path'),
102
+ document_analyzed=data.get('document_analyzed', False),
103
+ rfi_pain_points_items=data.get('rfi_pain_points_items', {}),
104
+ selected_pain_points=set(data.get('selected_pain_points', [])),
105
+ pain_point_content_map=data.get('pain_point_content_map', {}),
106
+ additional_specs_items=data.get('additional_specs_items', {}),
107
+ selected_additional_specs=set(data.get('selected_additional_specs', [])),
108
+ additional_specs_content_map=data.get('additional_specs_content_map', {}),
109
+ selected_target_roles=data.get('selected_target_roles', []),
110
+ selected_business_priorities=data.get('selected_business_priorities', []),
111
+ show_validation=data.get('show_validation', False),
112
+ processing_rfi=data.get('processing_rfi', False),
113
+ scraping_in_progress=data.get('scraping_in_progress', False),
114
+ pending_scrape_url=data.get('pending_scrape_url'),
115
+ css_applied=data.get('css_applied', False),
116
+ last_analyzed_url=data.get('last_analyzed_url'),
117
+ debug_mode=data.get('debug_mode', False)
118
+ )
119
+
120
+ def validate_mandatory_fields(self) -> bool:
121
+ """Validate mandatory fields"""
122
+ client_name = self.enterprise_name.strip()
123
+ client_requirement = self.client_requirements_content.strip()
124
+
125
+ if self.debug_mode:
126
+ print(f"DEBUG - Client Name: '{client_name}'")
127
+ print(f"DEBUG - Client Requirement: '{client_requirement}'")
128
+ print(f"DEBUG - Validation Result: {bool(client_name) and bool(client_requirement)}")
129
+
130
+ return bool(client_name) and bool(client_requirement)
131
+
132
+ def clear_data(self):
133
+ """Clear all client data"""
134
+ self.__init__()
135
+
136
+ def update_from_ui_inputs(self, **kwargs):
137
+ """Update dataclass fields from UI inputs"""
138
+ for key, value in kwargs.items():
139
+ if hasattr(self, key):
140
+ setattr(self, key, value)
141
+
142
+
143
+ class ClientDataManager:
144
+ """Manager class for handling ClientData persistence in Streamlit session state"""
145
+
146
+ SESSION_KEY = 'client_data'
147
+
148
+ @classmethod
149
+ def get_client_data(cls) -> ClientData:
150
+ """Get ClientData from session state or create new instance"""
151
+ if cls.SESSION_KEY not in st.session_state:
152
+ st.session_state[cls.SESSION_KEY] = ClientData()
153
+ return st.session_state[cls.SESSION_KEY]
154
+
155
+ @classmethod
156
+ def save_client_data(cls, client_data: ClientData):
157
+ """Save ClientData to session state"""
158
+ st.session_state[cls.SESSION_KEY] = client_data
159
+
160
+ @classmethod
161
+ def update_client_data(cls, **kwargs):
162
+ """Update specific fields in ClientData"""
163
+ client_data = cls.get_client_data()
164
+ client_data.update_from_ui_inputs(**kwargs)
165
+ cls.save_client_data(client_data)
166
+
167
+ @classmethod
168
+ def clear_client_data(cls):
169
+ """Clear all client data"""
170
+ if cls.SESSION_KEY in st.session_state:
171
+ del st.session_state[cls.SESSION_KEY]
172
+
173
+ @classmethod
174
+ def export_to_dict(cls) -> dict:
175
+ """Export client data as dictionary"""
176
+ client_data = cls.get_client_data()
177
+ return client_data.to_dict()
178
+
179
+ @classmethod
180
+ def import_from_dict(cls, data: dict):
181
+ """Import client data from dictionary"""
182
+ client_data = ClientData.from_dict(data)
183
+ cls.save_client_data(client_data)
184
+
185
+
186
+ # Utility functions for backwards compatibility
187
+ def validate_client_mandatory_fields() -> bool:
188
+ """Validate client mandatory fields using dataclass"""
189
+ client_data = ClientDataManager.get_client_data()
190
+ return client_data.validate_mandatory_fields()
191
+
192
+ def get_client_enterprise_name() -> str:
193
+ """Get client enterprise name"""
194
+ client_data = ClientDataManager.get_client_data()
195
+ return client_data.enterprise_name
196
+
197
+ def set_client_enterprise_name(name: str):
198
+ """Set client enterprise name"""
199
+ ClientDataManager.update_client_data(enterprise_name=name)
200
+
201
+ def get_client_requirements() -> str:
202
+ """Get client requirements"""
203
+ client_data = ClientDataManager.get_client_data()
204
+ return client_data.client_requirements_content
205
+
206
+ def set_client_requirements(requirements: str):
207
+ """Set client requirements"""
208
+ ClientDataManager.update_client_data(client_requirements_content=requirements)
209
+
210
+ # Add more utility functions as needed...
GS_Sales_Proposal/Client/client_utils.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from typing import List
4
+ import os
5
+
6
+ from WebsiteUrl_Agent.agent_runner import get_urls
7
+ import asyncio
8
+ from Document_Upload_Vectordb.pain_points_extractor import *
9
+ # Function to get URLs (placeholder function)
10
+
11
+ def get_urls_list(company_name) -> List[str]:
12
+ """
13
+ Placeholder function that returns a list of URLs
14
+ Replace this with your actual function that fetches URLs
15
+ """
16
+ return asyncio.run(get_urls(company_name))
17
+
18
+ # Function to get LinkedIn profiles (NEW)
19
+
20
+
21
+ # Function to get roles list
22
+ def get_roles_list() -> List[str]:
23
+ """
24
+ Function that returns a list of executive roles
25
+ """
26
+ return [
27
+ "CEO (Chief Executive Officer)",
28
+ "CMO (Chief Marketing Officer)",
29
+ "CTO (Chief Technology Officer)",
30
+ "CFO (Chief Financial Officer)",
31
+ "COO (Chief Operating Officer)",
32
+ "CHRO (Chief Human Resources Officer)",
33
+ "CDO (Chief Data Officer)",
34
+ "CPO (Chief Product Officer)",
35
+ "CRO (Chief Revenue Officer)",
36
+ "CIO (Chief Information Officer)"
37
+ ]
38
+
39
+
40
+ from WebScraper.scrape import get_data
41
+
42
+ def get_url_details(url:str):
43
+ """Use this if you want to run async function synchronously"""
44
+ try:
45
+ # Run the async function synchronously
46
+ website_details = asyncio.run(get_data(url))
47
+ return website_details
48
+ except Exception as e:
49
+ print(f"Error: {e}")
50
+ return None
51
+
52
+ def get_priority_suggestions() -> List[dict]:
53
+ """
54
+ Function that returns a list of priority suggestions with titles and descriptions
55
+ Replace this with your actual function that fetches priority suggestions
56
+ """
57
+ return [
58
+ {
59
+ "title": "Digital Transformation Initiative",
60
+ "description": "Modernize systems and processes for improved efficiency",
61
+ "icon": "🚀"
62
+ },
63
+ {
64
+ "title": "Data Analytics & Business Intelligence",
65
+ "description": "Implement advanced analytics for better decision making",
66
+ "icon": "📊"
67
+ },
68
+ {
69
+ "title": "Process Optimization & Automation",
70
+ "description": "Streamline workflows and reduce manual tasks",
71
+ "icon": "🔧"
72
+ }
73
+ ]
74
+
75
+ def get_editable_content() -> str:
76
+ """
77
+ Placeholder function that returns editable content
78
+ Replace this with your actual function that fetches editable content
79
+ """
80
+ return """This is editable content from the function:
81
+
82
+ - Project requirements and specifications
83
+ - Current implementation status
84
+ - Key stakeholder feedback
85
+ - Next steps and action items
86
+ - Additional notes and observations
87
+
88
+ You can modify this content as needed."""
89
+
90
+
91
+ # Function to get summary items (NEW)
92
+ # from Rag.rag import get_pain_points
93
+
94
+
95
+
96
+
97
+ def get_pain_items(file,company_name):
98
+ print("-----------------------------------------------------------")
99
+ return get_pain_points(file,company_name)
100
+
101
+
102
+
103
+
104
+ def check_field_validation(field_name: str, field_value: str, is_mandatory: bool = False) -> bool:
105
+ """Check if field validation should show warning"""
106
+ if is_mandatory and not field_value.strip():
107
+ return True
108
+ return False
109
+
110
+ def show_field_warning(field_name: str):
111
+ """Show warning message for mandatory fields"""
112
+ st.markdown(f'<div class="field-warning">⚠️ {field_name} is mandatory and cannot be empty!</div>', unsafe_allow_html=True)
113
+
114
+
115
+ def save_uploaded_file(uploaded_file, save_dir="uploaded_rf_is"):
116
+ os.makedirs(save_dir, exist_ok=True)
117
+ save_path = os.path.join(save_dir, uploaded_file.name)
118
+
119
+ with open(save_path, "wb") as f:
120
+ f.write(uploaded_file.getbuffer())
121
+
122
+ return save_path
123
+
124
+ def save_uploaded_file_and_get_path(uploaded_file):
125
+ """Save uploaded file to a temporary directory and return the file path"""
126
+ if uploaded_file is not None:
127
+ # Create uploads directory if it doesn't exist
128
+ upload_dir = "uploads"
129
+ if not os.path.exists(upload_dir):
130
+ os.makedirs(upload_dir)
131
+
132
+ # Create file path
133
+ file_path = os.path.join(upload_dir, uploaded_file.name)
134
+
135
+ # Save the file
136
+ with open(file_path, "wb") as f:
137
+ f.write(uploaded_file.getbuffer())
138
+
139
+ return file_path
140
+ return None
GS_Sales_Proposal/Document_Upload_Vectordb/__init__.py ADDED
File without changes
GS_Sales_Proposal/Document_Upload_Vectordb/doc_vectorizer.py ADDED
@@ -0,0 +1,564 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import base64
3
+ from io import BytesIO
4
+ import filetype
5
+ from pdf2image import convert_from_path
6
+ from datetime import datetime
7
+ import hashlib
8
+
9
+ # New imports for PPT and DOC support
10
+ from pptx import Presentation
11
+ from docx import Document
12
+ import docx2txt
13
+ from langchain_community.document_loaders import UnstructuredPowerPointLoader, Docx2txtLoader
14
+ from langchain_community.document_loaders import UnstructuredWordDocumentLoader
15
+
16
+ from langchain_core.messages import HumanMessage
17
+ from langchain_community.document_loaders import PyPDFLoader
18
+ from langchain_chroma import Chroma
19
+ from langchain_google_genai import ChatGoogleGenerativeAI
20
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
21
+ from langchain_huggingface import HuggingFaceEmbeddings
22
+ from .prompts import image_prompt # Make sure this exists
23
+
24
+ # --- Utility Functions ---
25
+
26
+ def get_filename(file_path):
27
+ return os.path.splitext(os.path.basename(file_path))[0]
28
+
29
+ def get_file_hash(file_path):
30
+ """Generate SHA-256 hash of file for duplicate detection"""
31
+ try:
32
+ with open(file_path, 'rb') as f:
33
+ return hashlib.sha256(f.read()).hexdigest()
34
+ except:
35
+ return None
36
+
37
+ def get_file_size(file_path):
38
+ """Get file size in bytes"""
39
+ try:
40
+ return os.path.getsize(file_path)
41
+ except:
42
+ return None
43
+
44
+ def create_base_metadata(file_path, company_name, file_type):
45
+ """Create base metadata common to all file types"""
46
+ timestamp = datetime.now().isoformat()
47
+
48
+ metadata = {
49
+ 'company_name': company_name,
50
+ 'file_type': file_type,
51
+ 'filename': os.path.basename(file_path) if isinstance(file_path, str) else 'uploaded_image',
52
+ 'file_path': file_path if isinstance(file_path, str) else None,
53
+ 'processed_timestamp': timestamp,
54
+ 'processing_date': datetime.now().strftime('%Y-%m-%d'),
55
+ 'processing_time': datetime.now().strftime('%H:%M:%S'),
56
+ 'chunk_strategy': 'recursive_character_text_splitter',
57
+ 'embedding_model': 'huggingface_default'
58
+ }
59
+
60
+ # Add file-specific metadata if it's a file path
61
+ if isinstance(file_path, str) and os.path.exists(file_path):
62
+ metadata.update({
63
+ 'file_hash': get_file_hash(file_path),
64
+ 'file_size_bytes': get_file_size(file_path),
65
+ 'file_extension': os.path.splitext(file_path)[1].lower()
66
+ })
67
+
68
+ return metadata
69
+
70
+ def file_router(file):
71
+ """Enhanced file router with PPT and DOC support"""
72
+ try:
73
+ # Get file extension first
74
+ file_extension = os.path.splitext(file)[1].lower()
75
+
76
+ # Handle specific extensions
77
+ if file_extension in ['.ppt', '.pptx']:
78
+ return 'powerpoint'
79
+ elif file_extension in ['.doc', '.docx']:
80
+ return 'word_document'
81
+
82
+ # Use filetype for other formats
83
+ kind = filetype.guess(file)
84
+ if kind is None:
85
+ return "Unknown"
86
+
87
+ file_type = kind.mime
88
+
89
+ if file_type.startswith("image/"):
90
+ return 'imagesingle'
91
+
92
+ # Check if it's a PDF with text or images
93
+ if file_type == 'application/pdf' or file_extension == '.pdf':
94
+ loader = PyPDFLoader(file)
95
+ docs = loader.load()
96
+
97
+ if not len(docs[0].page_content.strip()):
98
+ return 'imagepdf'
99
+ else:
100
+ return 'pdf'
101
+
102
+ return 'pdf' # Default fallback
103
+
104
+ except Exception as e:
105
+ print(f"Error in file_router: {e}")
106
+ return 'pdf' # Default fallback
107
+
108
+ def encode_image(image) -> str:
109
+ buffer = BytesIO()
110
+ image.save(buffer, format="PNG")
111
+ return base64.b64encode(buffer.getvalue()).decode("utf-8")
112
+
113
+ # --- LLM Setup ---
114
+
115
+ model = ChatGoogleGenerativeAI(model='gemini-2.0-flash')
116
+
117
+ def image_summarize(model, base64_image: str, prompt: str) -> str:
118
+ msg = model.invoke([
119
+ HumanMessage(
120
+ content=[
121
+ {"type": "text", "text": prompt},
122
+ {
123
+ "type": "image_url",
124
+ "image_url": {"url": f"data:image/png;base64,{base64_image}"},
125
+ },
126
+ ]
127
+ )
128
+ ])
129
+ return msg.content
130
+
131
+ # --- Image Handlers ---
132
+
133
+ def image_handler(image):
134
+ base64_img = encode_image(image)
135
+ summary = image_summarize(model, base64_img, prompt=image_prompt)
136
+ with open('example.txt', 'w') as f:
137
+ f.write(summary)
138
+ return summary
139
+
140
+ def image_handler_append(image):
141
+ base64_img = encode_image(image)
142
+ summary = image_summarize(model, base64_img, prompt=image_prompt)
143
+ with open('example.txt', 'a') as f:
144
+ f.write(summary + '\n')
145
+ return summary
146
+
147
+ # --- PowerPoint Handler ---
148
+
149
+ def extract_ppt_content(filepath: str):
150
+ """Extract text content from PowerPoint files"""
151
+ try:
152
+ prs = Presentation(filepath)
153
+ full_text = []
154
+ slide_count = 0
155
+
156
+ for slide_num, slide in enumerate(prs.slides, 1):
157
+ slide_text = f"=== Slide {slide_num} ===\n"
158
+
159
+ for shape in slide.shapes:
160
+ if hasattr(shape, "text") and shape.text.strip():
161
+ slide_text += shape.text + "\n"
162
+
163
+ # Handle tables in slides
164
+ if shape.has_table:
165
+ table = shape.table
166
+ for row in table.rows:
167
+ row_text = []
168
+ for cell in row.cells:
169
+ if cell.text.strip():
170
+ row_text.append(cell.text.strip())
171
+ if row_text:
172
+ slide_text += " | ".join(row_text) + "\n"
173
+
174
+ if slide_text.strip() != f"=== Slide {slide_num} ===":
175
+ full_text.append(slide_text)
176
+ slide_count += 1
177
+
178
+ return "\n\n".join(full_text), slide_count
179
+
180
+ except Exception as e:
181
+ print(f"Error extracting PowerPoint content with python-pptx: {e}")
182
+ # Fallback to langchain loader
183
+ try:
184
+ loader = UnstructuredPowerPointLoader(filepath)
185
+ docs = loader.load()
186
+ content = "\n\n".join([doc.page_content for doc in docs])
187
+ return content, len(docs)
188
+ except Exception as fallback_error:
189
+ print(f"Fallback PowerPoint loader failed: {fallback_error}")
190
+ return f"Error processing PowerPoint file: {str(e)}", 0
191
+
192
+ # --- Word Document Handler ---
193
+
194
+ def extract_word_content(filepath: str):
195
+ """Extract text content from Word documents"""
196
+ try:
197
+ file_extension = os.path.splitext(filepath)[1].lower()
198
+
199
+ if file_extension == '.docx':
200
+ # Use python-docx for .docx files
201
+ doc = Document(filepath)
202
+ full_text = []
203
+
204
+ # Extract paragraphs
205
+ for para in doc.paragraphs:
206
+ if para.text.strip():
207
+ full_text.append(para.text)
208
+
209
+ # Extract tables
210
+ for table in doc.tables:
211
+ for row in table.rows:
212
+ row_text = []
213
+ for cell in row.cells:
214
+ if cell.text.strip():
215
+ row_text.append(cell.text.strip())
216
+ if row_text:
217
+ full_text.append(" | ".join(row_text))
218
+
219
+ content = "\n\n".join(full_text)
220
+ return content, len(doc.paragraphs)
221
+
222
+ elif file_extension == '.doc':
223
+ # Use python-docx2txt for .doc files
224
+ content = docx2txt.process(filepath)
225
+ return content, len(content.split('\n'))
226
+
227
+ except Exception as e:
228
+ print(f"Error extracting Word content: {e}")
229
+ # Fallback to langchain loaders
230
+ try:
231
+ if filepath.endswith('.docx'):
232
+ loader = Docx2txtLoader(filepath)
233
+ else:
234
+ loader = UnstructuredWordDocumentLoader(filepath)
235
+
236
+ docs = loader.load()
237
+ content = "\n\n".join([doc.page_content for doc in docs])
238
+ return content, len(docs)
239
+
240
+ except Exception as fallback_error:
241
+ print(f"Fallback Word loader failed: {fallback_error}")
242
+ return f"Error processing Word document: {str(e)}", 0
243
+
244
+ # --- Enhanced Vectorization Functions ---
245
+
246
+ def vectorize_text(text: str, company_name: str, filename: str = "text_input", base_metadata: dict = None):
247
+ """Vectorize text content with metadata"""
248
+ try:
249
+ splitter = RecursiveCharacterTextSplitter(chunk_size=600, chunk_overlap=50)
250
+ docs = splitter.split_text(text)
251
+
252
+ # Create persist directory
253
+ persist_directory = os.path.join("chroma_store", company_name, filename)
254
+ os.makedirs(persist_directory, exist_ok=True)
255
+
256
+ # Create collection name (sanitize company name)
257
+ collection_name = f"{company_name}_{filename}".replace(" ", "_").replace("-", "_").lower()
258
+
259
+ # Create metadata for each chunk
260
+ metadatas = []
261
+ for i, chunk in enumerate(docs):
262
+ chunk_metadata = base_metadata.copy() if base_metadata else {}
263
+ chunk_metadata.update({
264
+ 'chunk_index': i,
265
+ 'chunk_size': len(chunk),
266
+ 'total_chunks': len(docs),
267
+ 'content_type': 'text',
268
+ 'source_document': filename
269
+ })
270
+ metadatas.append(chunk_metadata)
271
+
272
+ vectorstore = Chroma.from_texts(
273
+ texts=docs,
274
+ embedding=HuggingFaceEmbeddings(),
275
+ metadatas=metadatas,
276
+ persist_directory=persist_directory,
277
+ collection_name=collection_name
278
+ )
279
+ return vectorstore
280
+
281
+ except Exception as e:
282
+ print(f"Error in vectorize_text: {e}")
283
+ # Fallback to in-memory store
284
+ metadatas = [{'error': str(e), 'fallback': True} for _ in docs]
285
+ vectorstore = Chroma.from_texts(
286
+ texts=docs,
287
+ embedding=HuggingFaceEmbeddings(),
288
+ metadatas=metadatas,
289
+ collection_name=f"fallback_{collection_name}"
290
+ )
291
+ return vectorstore
292
+
293
+ def vectorize_powerpoint(filepath: str, company_name: str):
294
+ """Vectorize PowerPoint presentations"""
295
+ try:
296
+ content, slide_count = extract_ppt_content(filepath)
297
+ filename = get_filename(filepath)
298
+
299
+ # Create base metadata
300
+ base_metadata = create_base_metadata(filepath, company_name, 'powerpoint')
301
+ base_metadata.update({
302
+ 'total_slides': slide_count,
303
+ 'content_source': 'powerpoint_extraction',
304
+ 'extraction_method': 'python_pptx_with_langchain_fallback',
305
+ 'supports_tables': True,
306
+ 'supports_shapes': True
307
+ })
308
+
309
+ return vectorize_text(content, company_name, filename, base_metadata)
310
+
311
+ except Exception as e:
312
+ print(f"Error in vectorize_powerpoint: {e}")
313
+ error_metadata = {
314
+ 'error': str(e),
315
+ 'file_type': 'powerpoint',
316
+ 'extraction_failed': True
317
+ }
318
+ return vectorize_text("Error processing PowerPoint file", company_name, "error_ppt", error_metadata)
319
+
320
+ def vectorize_word_document(filepath: str, company_name: str):
321
+ """Vectorize Word documents"""
322
+ try:
323
+ content, paragraph_count = extract_word_content(filepath)
324
+ filename = get_filename(filepath)
325
+
326
+ # Create base metadata
327
+ base_metadata = create_base_metadata(filepath, company_name, 'word_document')
328
+ base_metadata.update({
329
+ 'paragraph_count': paragraph_count,
330
+ 'content_source': 'word_extraction',
331
+ 'extraction_method': 'python_docx_with_langchain_fallback',
332
+ 'supports_tables': True,
333
+ 'supports_formatting': True
334
+ })
335
+
336
+ return vectorize_text(content, company_name, filename, base_metadata)
337
+
338
+ except Exception as e:
339
+ print(f"Error in vectorize_word_document: {e}")
340
+ error_metadata = {
341
+ 'error': str(e),
342
+ 'file_type': 'word_document',
343
+ 'extraction_failed': True
344
+ }
345
+ return vectorize_text("Error processing Word document", company_name, "error_doc", error_metadata)
346
+
347
+ def vectorize_single_image(image, company_name: str):
348
+ """Vectorize single images"""
349
+ try:
350
+ # Create base metadata for image
351
+ base_metadata = create_base_metadata(image, company_name, 'single_image')
352
+ base_metadata.update({
353
+ 'content_source': 'ai_image_summary',
354
+ 'ai_model_used': 'gemini-2.0-flash',
355
+ 'processing_method': 'image_to_text_summary'
356
+ })
357
+
358
+ summary = image_handler(image)
359
+ filename = "image_single"
360
+ return vectorize_text(summary, company_name, filename, base_metadata)
361
+ except Exception as e:
362
+ print(f"Error in vectorize_single_image: {e}")
363
+ error_metadata = {'error': str(e), 'file_type': 'single_image'}
364
+ return vectorize_text("Error processing image", company_name, "error_image", error_metadata)
365
+
366
+ def vectorize_multiple_images(image_path: str, company_name: str):
367
+ """Vectorize PDF with images"""
368
+ try:
369
+ images = convert_from_path(image_path)
370
+ filename = get_filename(image_path)
371
+
372
+ # Create base metadata
373
+ base_metadata = create_base_metadata(image_path, company_name, 'pdf_images')
374
+ base_metadata.update({
375
+ 'total_pages': len(images),
376
+ 'content_source': 'ai_image_summary',
377
+ 'ai_model_used': 'gemini-2.0-flash',
378
+ 'processing_method': 'pdf_to_images_to_text',
379
+ 'conversion_tool': 'pdf2image'
380
+ })
381
+
382
+ summary = ''
383
+ for i, image in enumerate(images):
384
+ if i == 0:
385
+ summary = image_handler(image)
386
+ else:
387
+ summary += image_handler_append(image)
388
+
389
+ return vectorize_text(summary, company_name, filename, base_metadata)
390
+ except Exception as e:
391
+ print(f"Error in vectorize_multiple_images: {e}")
392
+ error_metadata = {'error': str(e), 'file_type': 'pdf_images'}
393
+ return vectorize_text("Error processing PDF images", company_name, "error_pdf_images", error_metadata)
394
+
395
+ def vectorize_docs(filepath: str, company_name: str):
396
+ """Vectorize PDF documents"""
397
+ try:
398
+ loader = PyPDFLoader(filepath)
399
+ docs = loader.load()
400
+ splitter = RecursiveCharacterTextSplitter(chunk_size=600, chunk_overlap=80)
401
+ chunks = splitter.split_documents(docs)
402
+ filename = get_filename(filepath)
403
+
404
+ # Create base metadata
405
+ base_metadata = create_base_metadata(filepath, company_name, 'pdf_document')
406
+ base_metadata.update({
407
+ 'total_pages': len(docs),
408
+ 'total_chunks_created': len(chunks),
409
+ 'chunk_size': 600,
410
+ 'chunk_overlap': 80,
411
+ 'content_source': 'direct_pdf_text',
412
+ 'loader_used': 'PyPDFLoader'
413
+ })
414
+
415
+ # Create persist directory
416
+ persist_directory = os.path.join("chroma_store", company_name, filename)
417
+ os.makedirs(persist_directory, exist_ok=True)
418
+
419
+ # Create collection name (sanitize)
420
+ collection_name = f"{company_name}_{filename}".replace(" ", "_").replace("-", "_").lower()
421
+
422
+ # Add metadata to each chunk
423
+ for i, chunk in enumerate(chunks):
424
+ chunk.metadata.update(base_metadata)
425
+ chunk.metadata.update({
426
+ 'chunk_index': i,
427
+ 'page_number': chunk.metadata.get('page', 'unknown'),
428
+ 'chunk_char_count': len(chunk.page_content)
429
+ })
430
+
431
+ vectorstore = Chroma.from_documents(
432
+ documents=chunks,
433
+ embedding=HuggingFaceEmbeddings(),
434
+ persist_directory=persist_directory,
435
+ collection_name=collection_name
436
+ )
437
+ return vectorstore
438
+
439
+ except Exception as e:
440
+ print(f"Error in vectorize_docs: {e}")
441
+ # Fallback to in-memory store
442
+ try:
443
+ loader = PyPDFLoader(filepath)
444
+ docs = loader.load()
445
+ splitter = RecursiveCharacterTextSplitter(chunk_size=600, chunk_overlap=80)
446
+ chunks = splitter.split_documents(docs)
447
+
448
+ # Add error metadata to fallback
449
+ error_metadata = {'error': str(e), 'fallback': True, 'file_type': 'pdf_document'}
450
+ for chunk in chunks:
451
+ chunk.metadata.update(error_metadata)
452
+
453
+ vectorstore = Chroma.from_documents(
454
+ documents=chunks,
455
+ embedding=HuggingFaceEmbeddings(),
456
+ collection_name=f"fallback_{company_name}_{filename}".replace(" ", "_").lower()
457
+ )
458
+ return vectorstore
459
+ except Exception as fallback_error:
460
+ print(f"Fallback also failed: {fallback_error}")
461
+ # Return minimal vectorstore
462
+ return Chroma.from_texts(
463
+ texts=["Error loading document"],
464
+ embedding=HuggingFaceEmbeddings(),
465
+ metadatas=[{'error': str(fallback_error), 'critical_failure': True}],
466
+ collection_name="error_fallback"
467
+ )
468
+
469
+ # --- Entry Point for Routing ---
470
+
471
+ def vectorize(filepath: str, company_name: str):
472
+ """Main vectorization function with enhanced file type support"""
473
+ try:
474
+ file_type = file_router(filepath)
475
+ print(f"Detected file type: {file_type}")
476
+
477
+ if file_type == 'imagesingle':
478
+ return vectorize_single_image(filepath, company_name)
479
+ elif file_type == 'imagepdf':
480
+ return vectorize_multiple_images(filepath, company_name)
481
+ elif file_type == 'powerpoint':
482
+ return vectorize_powerpoint(filepath, company_name)
483
+ elif file_type == 'word_document':
484
+ return vectorize_word_document(filepath, company_name)
485
+ else:
486
+ return vectorize_docs(filepath, company_name)
487
+
488
+ except Exception as e:
489
+ print(f"Error in vectorize main function: {e}")
490
+ # Ultimate fallback with comprehensive error metadata
491
+ error_metadata = {
492
+ 'error': str(e),
493
+ 'critical_failure': True,
494
+ 'processed_timestamp': datetime.now().isoformat(),
495
+ 'company_name': company_name,
496
+ 'attempted_file': filepath
497
+ }
498
+ return Chroma.from_texts(
499
+ texts=[f"Error processing file: {str(e)}"],
500
+ embedding=HuggingFaceEmbeddings(),
501
+ metadatas=[error_metadata],
502
+ collection_name="ultimate_fallback"
503
+ )
504
+
505
+ # --- Utility Functions for Metadata Queries ---
506
+
507
+ def search_by_metadata(vectorstore, metadata_filter: dict, query: str = None, k: int = 5):
508
+ """Search documents using metadata filters"""
509
+ try:
510
+ if query:
511
+ # Similarity search with metadata filter
512
+ results = vectorstore.similarity_search(
513
+ query=query,
514
+ k=k,
515
+ filter=metadata_filter
516
+ )
517
+ else:
518
+ # Get all documents matching metadata filter
519
+ results = vectorstore.get(where=metadata_filter, limit=k)
520
+ return results
521
+ except Exception as e:
522
+ print(f"Error in metadata search: {e}")
523
+ return []
524
+
525
+ def get_document_metadata_summary(vectorstore):
526
+ """Get a summary of all metadata in the vectorstore"""
527
+ try:
528
+ # This would need to be implemented based on your specific Chroma setup
529
+ # You might need to query the underlying collection directly
530
+ collection = vectorstore._collection
531
+ all_data = collection.get()
532
+
533
+ if all_data and 'metadatas' in all_data:
534
+ return {
535
+ 'total_documents': len(all_data['metadatas']),
536
+ 'unique_companies': set(meta.get('company_name') for meta in all_data['metadatas'] if meta.get('company_name')),
537
+ 'file_types': set(meta.get('file_type') for meta in all_data['metadatas'] if meta.get('file_type')),
538
+ 'processing_dates': set(meta.get('processing_date') for meta in all_data['metadatas'] if meta.get('processing_date'))
539
+ }
540
+ except Exception as e:
541
+ print(f"Error getting metadata summary: {e}")
542
+ return None
543
+
544
+ # --- Additional utility functions for specific file types ---
545
+
546
+ def get_supported_file_types():
547
+ """Return list of supported file types"""
548
+ return {
549
+ 'pdf': ['.pdf'],
550
+ 'powerpoint': ['.ppt', '.pptx'],
551
+ 'word_document': ['.doc', '.docx'],
552
+ 'images': ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp']
553
+ }
554
+
555
+ def validate_file_type(filepath: str):
556
+ """Validate if file type is supported"""
557
+ supported_types = get_supported_file_types()
558
+ file_extension = os.path.splitext(filepath)[1].lower()
559
+
560
+ for file_type, extensions in supported_types.items():
561
+ if file_extension in extensions:
562
+ return True, file_type
563
+
564
+ return False, "unsupported"
GS_Sales_Proposal/Document_Upload_Vectordb/doc_xtraction_utils.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def format_docs(docs):
2
+ return '\n\n'.join(doc.page_content for doc in docs)
3
+
4
+ def clean_to_list(result:str) :
5
+ result = result.strip()
6
+ if result.startswith('```python'):
7
+ result = result[len('```python'):].strip()
8
+ elif result.startswith('```json'):
9
+ result = result[len('```json'):].strip()
10
+ elif result.startswith('```'):
11
+ result = result[len('```'):].strip()
12
+ if result.endswith('```'):
13
+ result = result[:-3].strip()
14
+ return result
GS_Sales_Proposal/Document_Upload_Vectordb/pain_points_extractor.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_google_genai import ChatGoogleGenerativeAI
2
+ from dotenv import load_dotenv
3
+ from langchain_core.output_parsers import StrOutputParser
4
+ from langchain.prompts import ChatPromptTemplate
5
+ from langchain_core.runnables import RunnableLambda
6
+ load_dotenv()
7
+ import json
8
+
9
+ llm = ChatGoogleGenerativeAI(model = 'gemini-1.5-flash')
10
+
11
+ from .prompts import *
12
+
13
+ from langchain_core.prompts import ChatPromptTemplate
14
+
15
+ from .doc_vectorizer import vectorize
16
+
17
+ from .doc_xtraction_utils import *
18
+
19
+ def get_pain_points(file: str, company_name: str):
20
+ # Use a different variable name to avoid conflict with imported prompt
21
+ pain_point_template = ChatPromptTemplate.from_template(rfi_painpoint_prompt)
22
+ retriever = vectorize(file, company_name).as_retriever()
23
+
24
+ # Extract the query string from input and pass to retriever
25
+ context_chain = (
26
+ RunnableLambda(lambda x: x["query"]) # Extract just the query string
27
+ | retriever
28
+ | RunnableLambda(format_docs)
29
+ )
30
+
31
+ rag_chain = (
32
+ {"context": context_chain}
33
+ | pain_point_template # Use the renamed variable
34
+ | llm
35
+ | StrOutputParser()
36
+ )
37
+
38
+ try:
39
+ result = rag_chain.invoke({"query": "Extract key business concerns and pain points from this RFI."})
40
+ print(type(json.loads(clean_to_list(result))))
41
+ return json.loads(clean_to_list(result))
42
+ except Exception as e:
43
+ print(f"Error in get_pain_points: {e}")
44
+ return []
GS_Sales_Proposal/Document_Upload_Vectordb/prompts.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ image_prompt = """You are a highly meticulous AI assistant that extracts and summarizes every possible piece of visual information from an image without omitting any detail.
2
+ Your task is to generate an exhaustive, structured summary of the image that captures all the text, visual elements, layout, colors (if relevant), numbers, figures, and any context or formatting that might be useful.
3
+ Do not generalize or paraphrase — capture the content exactly as it appears. Use bullet points, lists, or structured sections (e.g., titles, tables, headers, footnotes) to organize your summary.
4
+
5
+ Be especially attentive to:
6
+ - All visible text, including headers, footnotes, and marginal notes
7
+ - Tables: Capture each row and column verbatim including headers and cell values
8
+ - Graphs/Charts: Explain all axes, labels, legends, data points, patterns, and conclusions
9
+ - Visual layout and structure: Describe how content is arranged (e.g., two-column layout, centered title, left-aligned figure)
10
+ - Icons, logos, or images embedded within the image: Describe them accurately
11
+ - Fonts, colors, and emphasis (e.g., bold, italic, underlined) if they seem meaningful
12
+ - Dates, numbers, symbols, or special formatting exactly as shown
13
+ - If the image is a document or scanned page, preserve hierarchy and document structure
14
+
15
+ Output the result in structured markdown with clear section headers (e.g., "Header", "Table 1", "Figure Description", "Text Body", "Footnotes").
16
+ Your goal is to allow someone to fully understand the image without seeing it, preserving maximum detail for use in downstream AI models or search systems."""
17
+
18
+
19
+
20
+
21
+ rfi_painpoint_prompt = """
22
+ You are a highly capable business analyst AI with deep expertise in sales, technology, and market research. Your task is to analyze an RFI (Request for Information) document from a client who is seeking digital or technology solutions.
23
+
24
+ From this document, extract and synthesize **three key insights or business pain points** that the client organization is implicitly or explicitly concerned about. Each pain point should be labeled under a relevant category, followed by a brief, insightful summary.
25
+
26
+ Here is the context of the sales proposal:
27
+ {context}
28
+
29
+ Respond with **only** a valid JSON dictionary using the following format:
30
+
31
+ {{
32
+ "Category 1": "Insightful and concise pain point summary.",
33
+ "Category 2": "Another brief and relevant pain point summary.",
34
+ "Category 3": "A third valuable insight from the RFI."
35
+ }}
36
+
37
+ ❌ Do **not** add any explanation, text before or after the dictionary, markdown, comments, or labels.
38
+ ✅ Return **only** the raw JSON dictionary — nothing else.
39
+ """
GS_Sales_Proposal/Document_Upload_Vectordb/rfi2.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de4da8161b3c88fdaaeab1c8ed7f338e03e1f198ca28a48f1e44d0556560a27d
3
+ size 174219
GS_Sales_Proposal/LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Amrutha-git-hub
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
GS_Sales_Proposal/Recommendation/__init__.py ADDED
File without changes
GS_Sales_Proposal/Recommendation/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (159 Bytes). View file
 
GS_Sales_Proposal/Recommendation/__pycache__/prompts.cpython-313.pyc ADDED
Binary file (1.77 kB). View file
 
GS_Sales_Proposal/Recommendation/__pycache__/recommendation_utils.cpython-313.pyc ADDED
Binary file (1.58 kB). View file
 
GS_Sales_Proposal/Recommendation/prompts.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ai_suggetion_for_additional_req_prompt = '''You are a B2B Sales manager and innovation strategist.
2
+
3
+ Your role is to review and enrich client requirements based on the following inputs:
4
+
5
+ **Enterprise Details**:
6
+ {enterprise_details}
7
+
8
+ **Current Client Requirements**:
9
+ {client_requirements}
10
+
11
+ Your tasks:
12
+
13
+ 1. Based on the selected client_requirements suggest any additional points to be included in terms of in terms of payment , time , budget etc
14
+
15
+ Respond in this format:
16
+
17
+ ---
18
+ ### ✅ Refined Client Requirements
19
+ [Improved version of the client requirements]
20
+
21
+ ---
22
+
23
+ ### 💡 Innovative Suggestions
24
+ - [Idea 1 with rationale]
25
+ - [Idea 2 with rationale]
26
+
27
+ ---
28
+
29
+ ### 📌 Best Practice Recommendations
30
+ - [What’s missing or could be enhanced]
31
+ - [Formatting, phrasing, or process suggestions]
32
+
33
+ ---
34
+
35
+ Ensure your language is professional, client-facing, and strategic.
36
+ '''
37
+ # ai_suggetion_for_additional_req_prompt = '''You are a senior solution consultant and innovation strategist.
38
+
39
+ # Your role is to review and enrich client requirements based on the following inputs:
40
+
41
+ # **Enterprise Details**:
42
+ # {enterprise_details}
43
+
44
+ # **Current Client Requirements**:
45
+ # {client_requirements}
46
+
47
+ # Your tasks:
48
+
49
+ # 1. **Assess Alignment**:
50
+ # - Evaluate if the client requirements are aligned with the enterprise’s offerings and capabilities.
51
+ # - Identify gaps, redundancies, or missing technical/business aspects.
52
+
53
+ # 2. **Recommend Improvements**:
54
+ # - Rewrite the client requirements for better clarity, completeness, and strategic fit.
55
+ # - Ensure inclusion of key components such as scope, deliverables, timelines, and measurable outcomes.
56
+
57
+ # 3. **Suggest Innovations**:
58
+ # - Propose at least **2 innovative or differentiating additions** that could delight the client or increase project value.
59
+ # - These could be technology enhancements, automation opportunities, personalization, integrations, or unique service models.
60
+
61
+ # 4. **Highlight Best Practices**:
62
+ # - Mention if anything is outdated, vague, or can be made more professional or efficient.
63
+ # - Share **best practices** relevant to the industry or solution area.
64
+
65
+ # Respond in this format:
66
+
67
+ # ---
68
+ # ### ✅ Refined Client Requirements
69
+ # [Improved version of the client requirements]
70
+
71
+ # ---
72
+
73
+ # ### 💡 Innovative Suggestions
74
+ # - [Idea 1 with rationale]
75
+ # - [Idea 2 with rationale]
76
+
77
+ # ---
78
+
79
+ # ### 📌 Best Practice Recommendations
80
+ # - [What’s missing or could be enhanced]
81
+ # - [Formatting, phrasing, or process suggestions]
82
+
83
+ # ---
84
+
85
+ # Ensure your language is professional, client-facing, and strategic.
86
+ # '''
87
+
88
+ business_priotiiry_recommendation_prompt = '''You are a B2B business strategy expert.
89
+
90
+ Your task is to identify the top 3 current business priorities for a client stakeholder based on their role.
91
+
92
+ **Client SPOC Role**: {client_spoc_role}
93
+
94
+ Guidelines:
95
+ - Focus on strategic goals and KPIs relevant to that role.
96
+ - Consider current trends and business environments (e.g., digital transformation, efficiency, AI adoption, cost control).
97
+ - Keep the priorities concise, professional, and relevant to decision-making.
98
+
99
+ Respond in the following format:
100
+
101
+ [
102
+ {{"title": "Strategic Growth and Vision", "icon": "📈"}},
103
+ {{"title": "Operational Efficiency", "icon": "⚙️"}},
104
+ {{"title": "Customer Experience", "icon": "💡"}}
105
+ ]
106
+
107
+ '''
GS_Sales_Proposal/Recommendation/recommendation_utils.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_core.prompts import ChatPromptTemplate
2
+ from .prompts import *
3
+ from langchain_google_genai import ChatGoogleGenerativeAI
4
+ from langchain_core.output_parsers import JsonOutputParser,StrOutputParser
5
+ from dotenv import load_dotenv
6
+ from Document_Upload_Vectordb.doc_xtraction_utils import clean_to_list
7
+ load_dotenv()
8
+ import json
9
+
10
+ llm = ChatGoogleGenerativeAI(model = 'gemini-1.5-flash')
11
+
12
+
13
+
14
+
15
+ def get_ai_client_requirements(enterprise_details,client_requirements):
16
+ template = ChatPromptTemplate.from_template(ai_suggetion_for_additional_req_prompt)
17
+ chain = template | llm | StrOutputParser()
18
+ result = chain.invoke({'enterprise_details':enterprise_details,'client_requirements':client_requirements})
19
+ return result
20
+
21
+ def get_ai_business_priorities(spoc_role="CEO"):
22
+ template = ChatPromptTemplate.from_template(business_priotiiry_recommendation_prompt)
23
+ chain = template | llm | JsonOutputParser()
24
+ result = chain.invoke({'client_spoc_role':spoc_role})
25
+ print(result)
26
+ return result
GS_Sales_Proposal/Search/Linkedin/__pycache__/linkedin_serp.cpython-313.pyc ADDED
Binary file (1.7 kB). View file
 
GS_Sales_Proposal/Search/Linkedin/linkedin_agent_runner_unused.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from google.adk.agents import Agent
2
+ from google.adk.tools import google_search
3
+ from pydantic import BaseModel,Field
4
+ from dotenv import load_dotenv
5
+ from google.adk.sessions import InMemorySessionService
6
+ from google.adk.runners import Runner
7
+ from google.genai import types
8
+ import ast
9
+ import re
10
+ from Search.LinkedIN.linkedin_agent_unused import *
11
+
12
+
13
+ # Setup session and runner
14
+ session_service = InMemorySessionService()
15
+ SESSION_ID = 'sess'
16
+ USER_ID = 'user'
17
+
18
+ session = session_service.create_session(
19
+ app_name="APP",
20
+ user_id=USER_ID,
21
+ session_id=SESSION_ID
22
+ )
23
+
24
+ runner = Runner(
25
+ app_name="APP",
26
+ session_service=session_service,
27
+ agent=search_agent
28
+ )
29
+ def extract_list_from_string(s):
30
+ # Remove any prefix like 'json' and extract the JSON array part
31
+ match = re.search(r"\[.*\]", s, re.DOTALL)
32
+ if match:
33
+ try:
34
+ return json.loads(match.group())
35
+ except json.JSONDecodeError:
36
+ print("Failed to parse list.")
37
+ else:
38
+ print("No list found.")
39
+ return None
40
+
41
+
42
+ import json
43
+ async def get_linkedin(user_name: str, runner=runner, user_id=USER_ID, session_id=SESSION_ID):
44
+ content = types.Content(role='user', parts=[types.Part(text=user_name)])
45
+ final_msg = ""
46
+
47
+ async for event in runner.run_async(user_id=user_id, session_id=session_id, new_message=content):
48
+ if event.is_final_response():
49
+ if event.content and event.content.parts:
50
+ final_msg = event.content.parts[0].text
51
+ elif event.actions and event.actions.escalate:
52
+ final_msg = event.error_message
53
+ result = final_msg
54
+ result = result.strip()
55
+ if result.startswith('```python'):
56
+ result = result[len('```python'):].strip()
57
+ elif result.startswith('```json'):
58
+ result = result[len('```json'):].strip()
59
+ elif result.startswith('```'):
60
+ result = result[len('```'):].strip()
61
+ if result.endswith('```'):
62
+ result = result[:-3].strip()
63
+ final_msg = result
64
+ print(final_msg)
65
+ return json.loads(final_msg)
66
+
GS_Sales_Proposal/Search/Linkedin/linkedin_agent_unused.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from google.adk.agents import Agent
2
+ from google.adk.tools import google_search
3
+ from pydantic import BaseModel,Field
4
+ from dotenv import load_dotenv
5
+ from google.adk.sessions import InMemorySessionService
6
+ from google.adk.runners import Runner
7
+ from google.genai import types
8
+ import ast
9
+ import re
10
+
11
+
12
+ load_dotenv()
13
+ class WebSite(BaseModel):
14
+ website_name : str = Field(description="Website name")
15
+ website_url : str = Field(description="Website url")
16
+
17
+
18
+ search_agent = Agent(
19
+ model='gemini-2.0-flash-001',
20
+ name='linkedin_profile_agent',
21
+ description=(
22
+ "You are an intelligent assistant that finds the most accurate and official LinkedIn profiles "
23
+ "of people and analyzes their current job roles to generate sales insights."
24
+ ),
25
+ instruction='''
26
+ Given the name of a person, your task is to find and return **exactly 5 people** for whom:
27
+
28
+ - A valid and official **LinkedIn profile URL** (`linkedin.com/in/...`) can be found.
29
+ - The **current job title/role** is either extracted from search preview or inferred based on their LinkedIn snippet.
30
+ - You can intelligently infer the **top 3 job priorities** relevant to a sales proposal (i.e., what matters to this person in a B2B sale).
31
+
32
+ ⚠️ STRICT RULES:
33
+ - DO NOT return any result without a valid LinkedIn URL.
34
+ - DO NOT invent or guess URLs — only use actual `linkedin.com/in/...` links found via search.
35
+ - Use `site:linkedin.com/in "Full Name"` on Google to identify results.
36
+ - Skip people for whom no real LinkedIn result is found.
37
+ - Return fewer than 5 results if necessary, but never include fake or placeholder data.
38
+
39
+ 📌 Response format MUST be a Python-style list of JSON objects like this:
40
+
41
+ [
42
+ {
43
+ "name": "Shreyank Isiri",
44
+ "linkedin_url": "https://www.linkedin.com/in/shreyankisiri/",
45
+ "role": "Solutions Architect at XYZ Corp",
46
+ "top_3_priorities": [
47
+ "Understanding client infrastructure needs",
48
+ "Designing scalable and secure systems",
49
+ "Supporting sales through technical expertise"
50
+ ]
51
+ },
52
+ ...
53
+ ]
54
+
55
+ IMPORTANT:
56
+ - Use temperature = 0
57
+ - DO NOT include explanations or markdown. Just return the list.
58
+ - Always ensure the LinkedIn URL is real and not hallucinated.
59
+ ''',
60
+ tools=[google_search],
61
+ )
62
+
63
+
GS_Sales_Proposal/Search/Linkedin/linkedin_serp.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import requests
3
+ from dotenv import load_dotenv
4
+ load_dotenv()
5
+ import os
6
+ import streamlit as st
7
+
8
+ def infer_priorities(title):
9
+ # Placeholder function: replace with your actual priority inference logic
10
+ return [" Scalability & Risk Mitigation","Operational Efficiency","Scalability & Risk Mitigation"]
11
+
12
+ def search_linkedin_serpapi(name):
13
+ params = {
14
+ "q": f'site:linkedin.com/in "{name}"',
15
+ "api_key": os.getenv("SERP_API_KEY"),
16
+ "engine": "google",
17
+ "num": 5
18
+ }
19
+
20
+ try:
21
+ response = requests.get("https://serpapi.com/search", params=params).json()
22
+ results = {}
23
+
24
+ for res in response.get("organic_results", []):
25
+ link = res.get("link", "")
26
+ title = res.get("title", "")
27
+ if "linkedin.com/in" in link:
28
+ results[link] = {
29
+ "name": name,
30
+ "role": title,
31
+ "top_3_priorities": infer_priorities(title)
32
+ }
33
+ if len(results) == 5:
34
+ break
35
+
36
+ print(results)
37
+ return results
38
+ except Exception as e:
39
+ st.error(f"Error searching LinkedIn profiles: {e}")
40
+ return {}
GS_Sales_Proposal/Search/WebsiteUrl_Agent/__pycache__/agent.cpython-312.pyc ADDED
Binary file (5.12 kB). View file
 
GS_Sales_Proposal/Search/WebsiteUrl_Agent/__pycache__/agent.cpython-313.pyc ADDED
Binary file (2.5 kB). View file
 
GS_Sales_Proposal/Search/WebsiteUrl_Agent/__pycache__/agent_runner.cpython-313.pyc ADDED
Binary file (3.49 kB). View file
 
GS_Sales_Proposal/Search/WebsiteUrl_Agent/agent.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from google.adk.agents import Agent
2
+ from google.adk.tools import google_search
3
+ from pydantic import BaseModel,Field
4
+ from dotenv import load_dotenv
5
+ from google.adk.sessions import InMemorySessionService
6
+ from google.adk.runners import Runner
7
+ from google.genai import types
8
+ import ast
9
+ import re
10
+
11
+
12
+ load_dotenv()
13
+ class WebSite(BaseModel):
14
+ website_name : str = Field(description="Website name")
15
+ website_url : str = Field(description="Website url")
16
+
17
+
18
+ search_agent = Agent(
19
+ model='gemini-2.0-flash-001',
20
+ name='url_agent',
21
+ description = (
22
+ "You are an intelligent assistant specialized in finding official and relevant websites "
23
+ "associated with a given organization or company name. Your goal is to retrieve high-quality, "
24
+ "credible links that accurately represent the digital presence of the organization."
25
+ ),
26
+ instruction = '''
27
+ Given the name of a company or organization, your task is to search and return the top 7 most relevant and credible website URLs associated with it.
28
+
29
+ These can include:
30
+ - The official company website try fetching this and if there are multiple then show all 7
31
+
32
+
33
+ Your response must be a clean Python-style list of strings, where each string is a valid URL.
34
+
35
+ Format your response exactly like this:
36
+
37
+ [
38
+ "https://google.com/",
39
+ "https://cloud.google.com",
40
+ "https://accounts.google.com"
41
+ ]
42
+
43
+ Like this any 10 urls that are related to the given organization name
44
+
45
+ Do not include explanations, only return the list of URLs.
46
+
47
+ IMPORTANT : Just return me list of urls no additional text
48
+
49
+ return like
50
+
51
+
52
+ ----
53
+ [
54
+ "https://google.com/",
55
+ "https://cloud.google.com",
56
+ "https://accounts.google.com"
57
+ ]
58
+
59
+ ----
60
+
61
+ VERY IMPORTANT : TEMPERATURE OF THE MODEL BE ZEROOOO AND remember dont give me like the links of youtube or linkedin or any other platforms
62
+ THE LINK SHOULD BE OFFICIAL LINK OF THE ORGANIZATION
63
+ ''',
64
+
65
+ tools = [google_search],
66
+ )
67
+
GS_Sales_Proposal/Search/WebsiteUrl_Agent/agent_runner.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from google.adk.agents import Agent
2
+ from google.adk.tools import google_search
3
+ from pydantic import BaseModel,Field
4
+ from dotenv import load_dotenv
5
+ from google.adk.sessions import InMemorySessionService
6
+ from google.adk.runners import Runner
7
+ from google.genai import types
8
+ import ast
9
+ import re
10
+ from WebsiteUrl_Agent.agent import *
11
+
12
+
13
+ # Setup session and runner
14
+ session_service = InMemorySessionService()
15
+ SESSION_ID = 'sess'
16
+ USER_ID = 'user'
17
+
18
+ session = session_service.create_session(
19
+ app_name="APP",
20
+ user_id=USER_ID,
21
+ session_id=SESSION_ID
22
+ )
23
+
24
+ runner = Runner(
25
+ app_name="APP",
26
+ session_service=session_service,
27
+ agent=search_agent
28
+ )
29
+ def extract_list_from_string(s):
30
+ # Remove any prefix like 'json' and extract the JSON array part
31
+ match = re.search(r"\[.*\]", s, re.DOTALL)
32
+ if match:
33
+ try:
34
+ return json.loads(match.group())
35
+ except json.JSONDecodeError:
36
+ print("Failed to parse list.")
37
+ else:
38
+ print("No list found.")
39
+ return None
40
+
41
+
42
+ import json
43
+ async def get_urls(company_name: str, runner=runner, user_id=USER_ID, session_id=SESSION_ID):
44
+ content = types.Content(role='user', parts=[types.Part(text=company_name)])
45
+ final_msg = ""
46
+
47
+ async for event in runner.run_async(user_id=user_id, session_id=session_id, new_message=content):
48
+ if event.is_final_response():
49
+ if event.content and event.content.parts:
50
+ final_msg = event.content.parts[0].text
51
+ elif event.actions and event.actions.escalate:
52
+ final_msg = event.error_message
53
+ result = final_msg
54
+ result = result.strip()
55
+ if result.startswith('```python'):
56
+ result = result[len('```python'):].strip()
57
+ elif result.startswith('```json'):
58
+ result = result[len('```json'):].strip()
59
+ elif result.startswith('```'):
60
+ result = result[len('```'):].strip()
61
+ if result.endswith('```'):
62
+ result = result[:-3].strip()
63
+ final_msg = result
64
+ print(final_msg)
65
+ return json.loads(final_msg)
66
+
67
+ import asyncio
68
+ asyncio.run(get_urls("growth sutra"))
GS_Sales_Proposal/Search/__pycache__/linkedin_serp.cpython-313.pyc ADDED
Binary file (1.7 kB). View file
 
GS_Sales_Proposal/Seller/__pycache__/seller.cpython-313.pyc ADDED
Binary file (20.4 kB). View file
 
GS_Sales_Proposal/Seller/__pycache__/seller_css.cpython-313.pyc ADDED
Binary file (12.5 kB). View file
 
GS_Sales_Proposal/Seller/__pycache__/seller_utils.cpython-313.pyc ADDED
Binary file (2.3 kB). View file
 
GS_Sales_Proposal/Seller/seller.py ADDED
@@ -0,0 +1,463 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from .seller_css import seller_css
3
+ from .seller_utils import *
4
+ from Search.Linkedin.linkedin_serp import *
5
+ from Recommendation.recommendation_utils import *
6
+ from t import *
7
+
8
+ def seller_tab():
9
+ # Re-apply CSS after every rerun to ensure persistence
10
+ st.markdown(seller_css, unsafe_allow_html=True)
11
+
12
+ # Initialize validation trigger
13
+ if 'show_validation' not in st.session_state:
14
+ st.session_state.show_validation = False
15
+
16
+ # Initialize enterprise details content in session state
17
+ if 'seller_enterprise_details_content' not in st.session_state:
18
+ st.session_state.seller_enterprise_details_content = ""
19
+
20
+ # Initialize seller requirements content in session state
21
+ if 'seller_requirements_content' not in st.session_state:
22
+ st.session_state.seller_requirements_content = ""
23
+
24
+ # Initialize URLs list in session state
25
+ if 'seller_website_urls_list' not in st.session_state:
26
+ st.session_state['seller_website_urls_list'] = []
27
+
28
+ # Initialize last company name to track changes
29
+ if 'last_seller_company_name' not in st.session_state:
30
+ st.session_state['last_seller_company_name'] = ""
31
+
32
+ # Initialize uploaded file path in session state
33
+ if 'seller_uploaded_file_path' not in st.session_state:
34
+ st.session_state['seller_uploaded_file_path'] = None
35
+
36
+ # Initialize RFI pain points items in session state
37
+ if 'seller_rfi_pain_points_items' not in st.session_state:
38
+ st.session_state['seller_rfi_pain_points_items'] = {}
39
+
40
+ # Initialize document analysis status
41
+ if 'seller_document_analyzed' not in st.session_state:
42
+ st.session_state['seller_document_analyzed'] = False
43
+
44
+ if 'seller_linkedin_profiles' not in st.session_state:
45
+ st.session_state['seller_linkedin_profiles'] = {}
46
+ if 'last_searched_seller_spoc' not in st.session_state:
47
+ st.session_state['last_searched_seller_spoc'] = ""
48
+
49
+ # Initialize scraping states
50
+ if 'seller_scraping_in_progress' not in st.session_state:
51
+ st.session_state['seller_scraping_in_progress'] = False
52
+ if 'seller_pending_scrape_url' not in st.session_state:
53
+ st.session_state['seller_pending_scrape_url'] = None
54
+
55
+ # Top section with seller name and URLs
56
+ col1, col2 = st.columns([1, 1])
57
+
58
+ with col1:
59
+ st.markdown("""
60
+ <div class="tooltip-label">
61
+ Seller Enterprise Name <span style="color:red;">*</span>
62
+ <div class="tooltip-icon" data-tooltip="Enter the full legal name of the seller organization. This is the primary identifier for the seller in all documentation and communications. This field is mandatory for creating the seller profile.">ⓘ</div>
63
+ </div>
64
+ """, unsafe_allow_html=True)
65
+
66
+ # Create a sub-column layout for name input and find URLs button
67
+ name_col, button_col = st.columns([3, 1])
68
+
69
+ with name_col:
70
+ seller_enterprise_name = st.text_input(
71
+ label="Seller Enterprise Name",
72
+ placeholder="Enter seller enterprise name...",
73
+ key="seller_enterprise_name_input",
74
+ label_visibility="collapsed"
75
+ )
76
+
77
+ with button_col:
78
+ # Find URLs button - only enabled when seller name has more than 2 characters
79
+ find_urls_disabled = not (seller_enterprise_name and len(seller_enterprise_name.strip()) > 2)
80
+
81
+ if st.button("🔍 Find Website",
82
+ disabled=find_urls_disabled,
83
+ help="Find website URLs for this company",
84
+ key="find_seller_urls_button"):
85
+ # Add spinner while fetching URLs
86
+ with st.spinner(f"Finding Websites for '{seller_enterprise_name.strip()}'..."):
87
+ try:
88
+ st.session_state['seller_website_urls_list'] = get_urls_list(seller_enterprise_name.strip())
89
+ st.session_state['last_seller_company_name'] = seller_enterprise_name
90
+ except Exception as e:
91
+ st.session_state['seller_website_urls_list'] = []
92
+ st.error(f"Error finding URLs: {str(e)}")
93
+
94
+ # Clear URLs if company name is cleared
95
+ if not seller_enterprise_name and st.session_state['last_seller_company_name']:
96
+ st.session_state['seller_website_urls_list'] = []
97
+ st.session_state['last_seller_company_name'] = ""
98
+
99
+ # Show validation warning if triggered and field is empty
100
+ if st.session_state.show_validation and check_field_validation("Seller Enterprise Name", seller_enterprise_name, True):
101
+ show_field_warning("Seller Enterprise Name")
102
+
103
+ with col2:
104
+ # Label row with inline emoji and tooltip
105
+ st.markdown('''
106
+ <div class="tooltip-label" style="display: flex; align-items: center; gap: 8px;">
107
+ <span>Seller Website URL</span>
108
+ <div class="tooltip-icon" data-tooltip="Enter or select the seller's official website URL. The system will automatically analyze the website to extract company information, services, and business details to help understand the seller's capabilities and offerings.">ⓘ</div>
109
+ </div>
110
+ ''', unsafe_allow_html=True)
111
+
112
+ # Create columns for dropdown and buttons - dropdown takes most space, buttons share remaining space
113
+ url_col, btn1_col, btn2_col, btn3_col = st.columns([7, 1, 1, 1])
114
+
115
+ with url_col:
116
+ # URL selection logic - Always show normal dropdown, just disable when no seller name
117
+ seller_name_provided = bool(seller_enterprise_name and seller_enterprise_name.strip())
118
+
119
+ if not st.session_state.get('seller_website_urls_list'):
120
+ # No URLs available - show default option
121
+ url_options = ["Select seller website URL"]
122
+ else:
123
+ # URLs available - show them in dropdown
124
+ url_options = ["Select seller website URL"] + st.session_state['seller_website_urls_list']
125
+
126
+ seller_website_url = st.selectbox(
127
+ label="Seller Website URL",
128
+ options=url_options,
129
+ key="seller_website_url_selector",
130
+ label_visibility="collapsed",
131
+ disabled=not seller_name_provided,
132
+ accept_new_options=True
133
+ )
134
+
135
+ # Reset to empty string if default option is selected
136
+ if seller_website_url == "Select seller website URL":
137
+ seller_website_url = ""
138
+
139
+ # Each button in its own column for horizontal alignment
140
+ with btn1_col:
141
+ if seller_website_url:
142
+ st.link_button("🌐", seller_website_url, help="Visit website",use_container_width=True)
143
+ else:
144
+ st.button("🌐", help="Visit website", disabled=True,use_container_width=True)
145
+ with btn2_col:
146
+ # Button 2: Refresh URL List
147
+ refresh_clicked = st.button("🔄", help="Refresh website URLs list", key="refresh_seller_urls_btn",use_container_width=True,disabled=not seller_website_url)
148
+
149
+ with btn3_col:
150
+ # Button 3: Scrape Website - Set up pending scrape instead of immediate execution
151
+ scrape_clicked = st.button("📑", help="Get enterprise details", key="scrape_seller_website_btn",use_container_width=True, disabled=not seller_website_url)
152
+
153
+ # Handle scrape button click by setting up pending operation
154
+ if scrape_clicked and seller_website_url:
155
+ st.session_state['seller_pending_scrape_url'] = seller_website_url
156
+ st.session_state['seller_scraping_in_progress'] = True
157
+ st.rerun()
158
+
159
+ # Handle refresh action outside columns for better UX
160
+ if refresh_clicked and seller_name_provided:
161
+ try:
162
+ with st.spinner("Refreshing website URLs..."):
163
+ st.session_state['seller_website_urls_list'] = get_urls_list(seller_enterprise_name)
164
+ st.success("Website URLs refreshed!")
165
+ st.rerun() # Refresh the page to show updated URLs
166
+ except Exception as e:
167
+ st.error(f"Error refreshing URLs: {str(e)}")
168
+
169
+ # Handle pending scraping operation OUTSIDE of columns to prevent UI blocking
170
+ if st.session_state.get('seller_scraping_in_progress') and st.session_state.get('seller_pending_scrape_url'):
171
+ # Show full-width spinner
172
+ with st.spinner(f"Scraping website details from {st.session_state['seller_pending_scrape_url']}..."):
173
+ try:
174
+ # Perform the scraping operation
175
+ website_details = get_url_details(st.session_state['seller_pending_scrape_url'])
176
+ st.session_state.seller_enterprise_details_content = website_details
177
+ st.session_state['last_analyzed_seller_url'] = st.session_state['seller_pending_scrape_url']
178
+
179
+ # Clear pending operation
180
+ st.session_state['seller_scraping_in_progress'] = False
181
+ st.session_state['seller_pending_scrape_url'] = None
182
+
183
+ st.success("Website details extracted successfully!")
184
+ st.rerun() # Refresh to show updated details
185
+
186
+ except Exception as e:
187
+ # Clear pending operation on error
188
+ st.session_state['seller_scraping_in_progress'] = False
189
+ st.session_state['seller_pending_scrape_url'] = None
190
+ st.error(f"Error scraping website: {str(e)}")
191
+
192
+ # Show validation warning if triggered and field is empty (optional)
193
+ if st.session_state.show_validation and check_field_validation("Seller Website URL", seller_website_url, False):
194
+ show_field_warning("Seller Website URL")
195
+
196
+
197
+ #-------------------------------------------------------------------------------
198
+
199
+ st.markdown('''
200
+ <div class="tooltip-label">
201
+ Upload Seller Document
202
+ <div class="tooltip-icon" data-tooltip="Upload seller-related documents such as company profiles, service catalogs, capabilities documents, or proposals in PDF, DOCX, TXT, or CSV format. The system will automatically analyze and extract key capabilities, services, and business strengths to help understand the seller's offerings.">ⓘ</div>
203
+ </div>
204
+ ''', unsafe_allow_html=True)
205
+
206
+ # Add custom CSS for file uploader and animations
207
+ st.markdown("""
208
+ <style>
209
+ .stFileUploader > div > div > div {
210
+ padding: 0.5rem !important;
211
+ min-height: 2rem !important;
212
+ }
213
+ .stFileUploader > div > div {
214
+ min-height: 2rem !important;
215
+ }
216
+ [data-testid="stFileUploader"] {
217
+ height: auto !important;
218
+ }
219
+ [data-testid="stFileUploader"] > div {
220
+ padding: 0.25rem 0.5rem !important;
221
+ min-height: 2rem !important;
222
+ }
223
+
224
+ /* Animation for processing file */
225
+ .processing-file {
226
+ animation: pulse 1.5s ease-in-out infinite;
227
+ background: linear-gradient(90deg, #e3f2fd, #bbdefb, #e3f2fd);
228
+ background-size: 200% 100%;
229
+ animation: shimmer 2s linear infinite;
230
+ border-radius: 4px;
231
+ padding: 2px 4px;
232
+ }
233
+
234
+ @keyframes pulse {
235
+ 0% { opacity: 0.6; }
236
+ 50% { opacity: 1; }
237
+ 100% { opacity: 0.6; }
238
+ }
239
+
240
+ @keyframes shimmer {
241
+ 0% { background-position: -200% 0; }
242
+ 100% { background-position: 200% 0; }
243
+ }
244
+
245
+ .analyzing-text {
246
+ color: #1976d2;
247
+ font-weight: 500;
248
+ }
249
+ </style>
250
+ """, unsafe_allow_html=True)
251
+
252
+ # FILE UPLOAD - Always enabled, independent of seller name (multiple files)
253
+ seller_documents_upload = st.file_uploader(
254
+ label="Upload Seller Documents",
255
+ type=['pdf', 'docx', 'txt', 'csv','png','jpg','jpeg'],
256
+ key="seller_documents_uploader",
257
+ label_visibility="collapsed",
258
+ accept_multiple_files=True
259
+ )
260
+
261
+ # Initialize processing states and file tracking
262
+ if 'processing_all_seller_documents' not in st.session_state:
263
+ st.session_state['processing_all_seller_documents'] = False
264
+ if 'seller_uploaded_files_paths' not in st.session_state:
265
+ st.session_state['seller_uploaded_files_paths'] = {}
266
+ if 'seller_services_by_file' not in st.session_state:
267
+ st.session_state['seller_services_by_file'] = {}
268
+
269
+ # Show file info and single analyze button for all files
270
+ if seller_documents_upload is not None and len(seller_documents_upload) > 0:
271
+ st.markdown("**Uploaded Documents:**")
272
+
273
+ # Display all uploaded files
274
+ for idx, uploaded_file in enumerate(seller_documents_upload):
275
+ file_key = f"{uploaded_file.name}_{uploaded_file.size}" # Unique key for each file
276
+
277
+ # Very compact single line display
278
+ file_size_kb = round(uploaded_file.size / 1024, 1)
279
+ file_size_display = f"{file_size_kb}KB" if file_size_kb < 1024 else f"{round(file_size_kb/1024, 1)}MB"
280
+
281
+ # Check if this file has been processed
282
+ is_processed = file_key in st.session_state.get('seller_services_by_file', {})
283
+ is_processing = st.session_state.get('processing_all_seller_documents', False)
284
+
285
+ if is_processing:
286
+ # Show animated processing state
287
+ st.markdown(f"""
288
+ <div class="processing-file">
289
+ <span style='font-size:0.8em' class="analyzing-text">
290
+ 🔄 {uploaded_file.name[:25]}{'...' if len(uploaded_file.name) > 25 else ''} (Analyzing...)
291
+ </span>
292
+ </div>
293
+ """, unsafe_allow_html=True)
294
+ else:
295
+ # Show normal file info with status
296
+ status_icon = "✅" if is_processed else "📄"
297
+ st.markdown(f"<span style='font-size:0.8em'>{status_icon} {uploaded_file.name[:30]}{'...' if len(uploaded_file.name) > 30 else ''} ({file_size_display})</span>",
298
+ unsafe_allow_html=True)
299
+
300
+ # Single button to process all files
301
+ st.markdown("---") # Separator line
302
+
303
+ # Check if all files are already processed
304
+ all_processed = all(
305
+ f"{file.name}_{file.size}" in st.session_state.get('seller_services_by_file', {})
306
+ for file in seller_documents_upload
307
+ )
308
+
309
+ is_processing = st.session_state.get('processing_all_seller_documents', False)
310
+
311
+ # Button styling
312
+ if all_processed:
313
+ button_color = "#28a745" # Green for all processed
314
+ button_text = "All Documents Processed"
315
+ button_disabled = True
316
+ elif is_processing:
317
+ button_color = "#FF6B6B" # Red for processing
318
+ button_text = "Analyzing All Documents..."
319
+ button_disabled = True
320
+ else:
321
+ button_color = "#4CAF50" # Blue for ready to process
322
+ button_text = f"Get Services from All Documents ({len(seller_documents_upload)} files)"
323
+ button_disabled = False
324
+
325
+ st.markdown(f"""
326
+ <style>
327
+ div.stButton > button:first-child {{
328
+ background-color: {button_color};
329
+ color: white;
330
+ border: none;
331
+ font-weight: bold;
332
+ }}
333
+ </style>
334
+ """, unsafe_allow_html=True)
335
+
336
+ # Single analyze button for all files
337
+ analyze_all_clicked = st.button(
338
+ button_text,
339
+ key="analyze_all_seller_documents_btn",
340
+ help="Process all seller documents" if not button_disabled else "All documents processed" if all_processed else "Processing in progress...",
341
+ type="secondary",
342
+ disabled=button_disabled,
343
+ use_container_width=True
344
+ )
345
+
346
+ # Handle analyze button click for all files
347
+ if analyze_all_clicked and not button_disabled:
348
+ if not seller_enterprise_name:
349
+ st.error("❌ Please enter the Seller Enterprise Name first")
350
+ else:
351
+ # Set processing flag for all files
352
+ st.session_state['processing_all_seller_documents'] = True
353
+ st.rerun() # Refresh to show processing state
354
+
355
+ # Handle processing for all files when button is clicked
356
+ if st.session_state.get('processing_all_seller_documents', False):
357
+ # Show overall processing indicator
358
+ with st.container():
359
+ st.markdown("**🔍 Processing all documents and extracting services...**")
360
+
361
+ # Process each file
362
+ all_services = {}
363
+ processed_count = 0
364
+ total_files = len(seller_documents_upload)
365
+
366
+ for idx, uploaded_file in enumerate(seller_documents_upload):
367
+ file_key = f"{uploaded_file.name}_{uploaded_file.size}"
368
+
369
+ # Show progress for current file
370
+ progress_text = f"Processing {uploaded_file.name} ({idx + 1}/{total_files})..."
371
+ with st.spinner(progress_text):
372
+ try:
373
+ # Save the file and get the path
374
+ file_path = save_uploaded_file_and_get_path(uploaded_file)
375
+ st.session_state['seller_uploaded_files_paths'][file_key] = file_path
376
+
377
+ if file_path and seller_enterprise_name:
378
+ # Extract services using the file path and company name
379
+ file_services = get_seller_services(file_path, seller_enterprise_name)
380
+
381
+ # Store services data for this specific file
382
+ st.session_state['seller_services_by_file'][file_key] = {
383
+ 'filename': uploaded_file.name,
384
+ 'services': file_services,
385
+ 'file_path': file_path
386
+ }
387
+
388
+ # Combine services from this file
389
+ if isinstance(file_services, dict):
390
+ all_services.update(file_services)
391
+
392
+ processed_count += 1
393
+ st.success(f"✅ {uploaded_file.name} processed successfully!")
394
+
395
+ else:
396
+ st.error(f"❌ Error saving {uploaded_file.name}")
397
+
398
+ except Exception as e:
399
+ st.error(f"❌ Error processing {uploaded_file.name}: {str(e)}")
400
+
401
+ # Update combined services and reset processing flag
402
+ st.session_state['seller_services_items'] = all_services
403
+ st.session_state['seller_document_analyzed'] = True if processed_count > 0 else False
404
+ st.session_state['processing_all_seller_documents'] = False
405
+
406
+ # Show final summary
407
+ if processed_count == total_files:
408
+ st.success(f"🎉 All {total_files} documents processed successfully!")
409
+ elif processed_count > 0:
410
+ st.warning(f"⚠️ {processed_count} out of {total_files} documents processed successfully.")
411
+ else:
412
+ st.error("❌ No documents could be processed.")
413
+
414
+ st.rerun() # Refresh to update UI
415
+
416
+ # Function call for Seller Services Offered selection
417
+ seller_enterprise_details, seller_enterprise_details_provided = render_three_column_selector_unified(
418
+ # Column configuration - Made wider to fill screen
419
+ column_ratio=(2, 2, 2), # Equal wider columns
420
+ column_gap="large", # Increased gap for better spacing
421
+
422
+ # Left column (text area) configuration
423
+ left_title="Seller Enterprise Details",
424
+ left_tooltip="Define your enterprise details, services offered, company capabilities, core competencies, and business portfolio. This information helps clients understand your organizational strengths and service offerings.",
425
+ left_required=True,
426
+ textarea_height=200, # Increased height for better visibility
427
+ textarea_placeholder="Enter seller enterprise name first to enable this field",
428
+ textarea_session_key="seller_enterprise_content",
429
+ textarea_widget_key="seller_enterprise_textarea",
430
+
431
+ # Unified right section (middle + right columns) configuration
432
+ unified_section_title="Available Services & Capabilities",
433
+ unified_section_tooltip="Select from available services and capabilities that represent your enterprise offerings. These can include technical services, consulting, products, or specialized business solutions.",
434
+
435
+ # Session state keys for both sides
436
+ middle_selected_items_key="selected_services_offered",
437
+ middle_content_map_key="services_content_map",
438
+ right_selected_items_key="selected_additional_capabilities",
439
+ right_content_map_key="capabilities_content_map",
440
+
441
+ # Single data source that will be displayed in both columns
442
+ default_data=None, # You would pass your services data dictionary here
443
+ split_ratio=(3, 3), # How many items go to middle vs right column
444
+
445
+ # Enable/disable conditions
446
+ client_enabled_condition=True,
447
+ client_name_provided=True,
448
+
449
+ # Styling configuration
450
+ button_column_width=2.5, # Button width within each column
451
+ content_column_width=6.5, # Content area width within each column
452
+ show_success_messages=False,
453
+ selected_color="#2e7d32", # Green color
454
+ selected_border_color="#4caf50", # Green border
455
+ unselected_color="#404040",
456
+ unselected_border_color="#404040",
457
+ text_color="#ffffff",
458
+
459
+ # Title styling - Made normal size like left title
460
+ title_font_size="18px", # Same as other titles
461
+ title_color="#ffffff",
462
+ title_margin_bottom="10px" # Reduced margin
463
+ )
GS_Sales_Proposal/Seller/seller_css.py ADDED
@@ -0,0 +1,430 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ seller_css = """
2
+ <style>
3
+ .client-section {
4
+ background: #f5f5f5;
5
+ padding: 1.5rem;
6
+ border-radius: 10px;
7
+ border-left: 4px solid #667eea;
8
+ margin-bottom: 1rem;
9
+ color: #2a2a2a;
10
+ }
11
+
12
+ .url-section {
13
+ background: #f5f5f5;
14
+ padding: 1.5rem;
15
+ border-radius: 10px;
16
+ border-left: 4px solid #764ba2;
17
+ margin-bottom: 1rem;
18
+ color: #2a2a2a;
19
+ }
20
+
21
+ .document-section {
22
+ background: #f5f5f5;
23
+ padding: 1.5rem;
24
+ border-radius: 10px;
25
+ border: 2px solid #5a9f9f;
26
+ margin-bottom: 1rem;
27
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.3);
28
+ color: #2a2a2a;
29
+ }
30
+
31
+ .pain-points-section {
32
+ background: #f5f5f5;
33
+ padding: 1.5rem;
34
+ border-radius: 10px;
35
+ border-left: 4px solid #ffc107;
36
+ color: #2a2a2a;
37
+ }
38
+
39
+ .roles-section {
40
+ background: #f5f5f5;
41
+ padding: 1.5rem;
42
+ border-radius: 10px;
43
+ border-left: 4px solid #2196f3;
44
+ color: #2a2a2a;
45
+ }
46
+
47
+ .priorities-section {
48
+ background: #f5f5f5;
49
+ padding: 1.5rem;
50
+ border-radius: 10px;
51
+ border-left: 4px solid #9c27b0;
52
+ color: #2a2a2a;
53
+ }
54
+
55
+ .ai-suggestion-section {
56
+ background: #f5f5f5;
57
+ padding: 1.5rem;
58
+ border-radius: 10px;
59
+ border-left: 4px solid #00bcd4;
60
+ color: #2a2a2a;
61
+ }
62
+
63
+ .upload-section {
64
+ border: 2px dashed #667eea;
65
+ border-radius: 10px;
66
+ padding: 2rem;
67
+ text-align: center;
68
+ background: #f5f5f5;
69
+ color: #2a2a2a;
70
+ }
71
+
72
+ /* Style section headers */
73
+ .section-header {
74
+ color: #2a2a2a;
75
+ font-size: 1.2rem;
76
+ font-weight: 600;
77
+ margin-bottom: 1rem;
78
+ }
79
+
80
+ /* Mandatory field styling */
81
+ .mandatory-label {
82
+ color: #e74c3c;
83
+ font-weight: 600;
84
+ }
85
+
86
+ .field-warning {
87
+ color: #e74c3c;
88
+ font-size: 0.85rem;
89
+ margin-top: 0.25rem;
90
+ font-weight: 500;
91
+ background: rgba(231, 76, 60, 0.1);
92
+ padding: 0.5rem;
93
+ border-radius: 4px;
94
+ border-left: 3px solid #e74c3c;
95
+ }
96
+
97
+ .optional-label {
98
+ color: #666666;
99
+ font-size: 0.8rem;
100
+ font-style: italic;
101
+ }
102
+
103
+ .ai-label {
104
+ color: #00bcd4;
105
+ font-size: 0.8rem;
106
+ font-style: italic;
107
+ }
108
+
109
+ /* Custom styling for URL buttons */
110
+ .url-button-container {
111
+ display: flex;
112
+ gap: 5px;
113
+ align-items: center;
114
+ }
115
+
116
+ .url-button {
117
+ background: #667eea;
118
+ color: white;
119
+ border: none;
120
+ padding: 8px 12px;
121
+ border-radius: 6px;
122
+ cursor: pointer;
123
+ font-size: 14px;
124
+ transition: background-color 0.3s;
125
+ }
126
+
127
+ .url-button:hover {
128
+ background: #5a6fd8;
129
+ }
130
+
131
+ /* Summary item styling */
132
+ .summary-item {
133
+ background: #f5f5f5;
134
+ border: 1px solid #5a9f9f;
135
+ border-radius: 8px;
136
+ padding: 12px;
137
+ margin-bottom: 8px;
138
+ display: flex;
139
+ justify-content: space-between;
140
+ align-items: center;
141
+ color: #2a2a2a;
142
+ }
143
+
144
+ .summary-key {
145
+ font-weight: 600;
146
+ color: #667eea;
147
+ }
148
+
149
+ .add-button {
150
+ background: #28a745;
151
+ color: white;
152
+ border: none;
153
+ padding: 6px 12px;
154
+ border-radius: 4px;
155
+ cursor: pointer;
156
+ font-size: 12px;
157
+ font-weight: bold;
158
+ }
159
+
160
+ .add-button:hover {
161
+ background: #218838;
162
+ }
163
+
164
+ .summary-buttons {
165
+ display: flex;
166
+ gap: 8px;
167
+ margin-bottom: 12px;
168
+ }
169
+
170
+ .summary-control-btn {
171
+ background: #007bff;
172
+ color: white;
173
+ border: none;
174
+ padding: 6px 12px;
175
+ border-radius: 4px;
176
+ cursor: pointer;
177
+ font-size: 12px;
178
+ }
179
+
180
+ .summary-control-btn:hover {
181
+ background: #0056b3;
182
+ }
183
+
184
+ /* Fixed tooltip label alignment */
185
+ .tooltip-label {
186
+ font-size: 16px;
187
+ font-weight: bold;
188
+ margin-bottom: 8px;
189
+ display: flex;
190
+ align-items: center;
191
+ gap: 6px;
192
+ height: 24px;
193
+ line-height: 24px;
194
+ min-height: 32px;
195
+ display: flex;
196
+ align-items: flex-end;
197
+ }
198
+
199
+ .tooltip-icon {
200
+ position: relative;
201
+ display: inline-block;
202
+ cursor: pointer;
203
+ margin-left: 0;
204
+ }
205
+
206
+ .tooltip-icon::after {
207
+ content: attr(data-tooltip);
208
+ visibility: hidden;
209
+ width: 250px;
210
+ background-color: #555;
211
+ color: #fff;
212
+ text-align: left;
213
+ border-radius: 6px;
214
+ padding: 8px;
215
+ position: absolute;
216
+ z-index: 1;
217
+ bottom: 125%;
218
+ left: 50%;
219
+ margin-left: -125px;
220
+ opacity: 0;
221
+ transition: opacity 0.3s;
222
+ }
223
+
224
+ .tooltip-icon:hover::after {
225
+ visibility: visible;
226
+ opacity: 1;
227
+ }
228
+
229
+ /* Streamlit input elements styling - ALL INPUTS */
230
+
231
+ /* Text Input */
232
+ .stTextInput > div > div > input {
233
+ background-color: #f5f5f5 !important;
234
+ color: #2a2a2a !important;
235
+ border: 2px solid #5a9f9f !important;
236
+ border-radius: 8px !important;
237
+ padding: 12px !important;
238
+ font-size: 14px !important;
239
+ }
240
+
241
+ /* Text Area */
242
+ .stTextArea > div > div > textarea {
243
+ background-color: #f5f5f5 !important;
244
+ color: #2a2a2a !important;
245
+ border: 2px solid #5a9f9f !important;
246
+ border-radius: 8px !important;
247
+ padding: 12px !important;
248
+ font-size: 14px !important;
249
+ }
250
+
251
+ /* Number Input */
252
+ .stNumberInput > div > div > input {
253
+ background-color: #f5f5f5 !important;
254
+ color: #2a2a2a !important;
255
+ border: 2px solid #5a9f9f !important;
256
+ border-radius: 8px !important;
257
+ padding: 12px !important;
258
+ font-size: 14px !important;
259
+ }
260
+
261
+ /* Select Box */
262
+ .stSelectbox > div > div > div {
263
+ background-color: #f5f5f5 !important;
264
+ color: #2a2a2a !important;
265
+ border: 2px solid #5a9f9f !important;
266
+ border-radius: 8px !important;
267
+ }
268
+
269
+ /* Multiselect */
270
+ .stMultiSelect > div > div > div {
271
+ background-color: #f5f5f5 !important;
272
+ color: #2a2a2a !important;
273
+ border: 2px solid #5a9f9f !important;
274
+ border-radius: 8px !important;
275
+ }
276
+
277
+ /* Date Input */
278
+ .stDateInput > div > div > input {
279
+ background-color: #f5f5f5 !important;
280
+ color: #2a2a2a !important;
281
+ border: 2px solid #5a9f9f !important;
282
+ border-radius: 8px !important;
283
+ padding: 12px !important;
284
+ font-size: 14px !important;
285
+ }
286
+
287
+ /* Time Input */
288
+ .stTimeInput > div > div > input {
289
+ background-color: #f5f5f5 !important;
290
+ color: #2a2a2a !important;
291
+ border: 2px solid #5a9f9f !important;
292
+ border-radius: 8px !important;
293
+ padding: 12px !important;
294
+ font-size: 14px !important;
295
+ }
296
+
297
+ /* File Uploader */
298
+ .stFileUploader > div > div {
299
+ background-color: #f5f5f5 !important;
300
+ color: #2a2a2a !important;
301
+ border: 2px solid #5a9f9f !important;
302
+ border-radius: 8px !important;
303
+ }
304
+
305
+ /* Color Picker */
306
+ .stColorPicker > div > div > input {
307
+ background-color: #f5f5f5 !important;
308
+ border: 2px solid #5a9f9f !important;
309
+ border-radius: 8px !important;
310
+ }
311
+
312
+ /* Focus states for all inputs */
313
+ .stTextInput > div > div > input:focus,
314
+ .stTextArea > div > div > textarea:focus,
315
+ .stNumberInput > div > div > input:focus,
316
+ .stDateInput > div > div > input:focus,
317
+ .stTimeInput > div > div > input:focus {
318
+ border-color: #667eea !important;
319
+ box-shadow: 0 0 0 2px rgba(102, 126, 234, 0.2) !important;
320
+ outline: none !important;
321
+ background-color: #f5f5f5 !important;
322
+ color: #2a2a2a !important;
323
+ }
324
+
325
+ /* Active/typing states to ensure text stays visible */
326
+ .stTextInput > div > div > input:active,
327
+ .stTextArea > div > div > textarea:active,
328
+ .stNumberInput > div > div > input:active,
329
+ .stDateInput > div > div > input:active,
330
+ .stTimeInput > div > div > input:active {
331
+ background-color: #f5f5f5 !important;
332
+ color: #2a2a2a !important;
333
+ }
334
+
335
+ /* Placeholder text for all inputs */
336
+ .stTextInput > div > div > input::placeholder,
337
+ .stTextArea > div > div > textarea::placeholder,
338
+ .stNumberInput > div > div > input::placeholder,
339
+ .stDateInput > div > div > input::placeholder,
340
+ .stTimeInput > div > div > input::placeholder {
341
+ color: #666666 !important;
342
+ opacity: 0.7 !important;
343
+ }
344
+
345
+ /* Labels for all input types */
346
+ .stTextInput > label,
347
+ .stTextArea > label,
348
+ .stNumberInput > label,
349
+ .stSelectbox > label,
350
+ .stMultiSelect > label,
351
+ .stDateInput > label,
352
+ .stTimeInput > label,
353
+ .stFileUploader > label,
354
+ .stColorPicker > label {
355
+ color: #2a2a2a !important;
356
+ font-weight: 600 !important;
357
+ margin-bottom: 8px !important;
358
+ }
359
+
360
+ /* Dropdown options styling */
361
+ .stSelectbox div[data-baseweb="select"] > div > div,
362
+ .stMultiSelect div[data-baseweb="select"] > div > div {
363
+ background-color: #f5f5f5 !important;
364
+ }
365
+
366
+ /* File uploader drag and drop area */
367
+ .stFileUploader section {
368
+ background-color: #f5f5f5 !important;
369
+ border: 2px dashed #5a9f9f !important;
370
+ border-radius: 8px !important;
371
+ }
372
+ input,
373
+ textarea,
374
+ select,
375
+ .stSelectbox,
376
+ .stMultiSelect {
377
+ color: #2a2a2a !important;
378
+ }
379
+
380
+ /* Force override all button styling */
381
+ button[kind="secondary"] {
382
+ height: 48px !important;
383
+ border: 2.2px solid #618f8f !important;
384
+ border-radius: 4px !important;
385
+ margin-top: -5px !important; /* Move button up */
386
+ transform: translateY(-3px) !important; /* Additional upward adjustment */
387
+ background-color: #4a4a4a !important; /* Dark greyish background */
388
+ color: white !important; /* White text */
389
+ }
390
+
391
+ button[kind="secondary"]:hover {
392
+ border: 2.2px solid #618f8f !important;
393
+ transform: translateY(-3px) !important; /* Keep position on hover */
394
+ background-color: #5a5a5a !important; /* Slightly lighter on hover */
395
+ color: white !important; /* Keep white text on hover */
396
+ }
397
+
398
+ button[kind="secondary"]:focus {
399
+ border: 2.2px solid #618f8f !important;
400
+ outline: 2px solid #618f8f !important;
401
+ transform: translateY(-3px) !important; /* Keep position on focus */
402
+ background-color: #4a4a4a !important; /* Keep dark background on focus */
403
+ color: white !important; /* Keep white text on focus */
404
+ }
405
+
406
+ /* Try targeting by data attributes */
407
+ [data-testid] button {
408
+ border: 2.2px solid #618f8f !important;
409
+ height: 48px !important;
410
+ margin-top: -5px !important; /* Move button up */
411
+ transform: translateY(-3px) !important; /* Additional upward adjustment */
412
+ background-color: #4a4a4a !important; /* Dark greyish background */
413
+ color: white !important; /* White text */
414
+ }
415
+
416
+ /* Additional targeting for button text specifically */
417
+ button[kind="secondary"] p,
418
+ button[kind="secondary"] span,
419
+ button[kind="secondary"] div {
420
+ color: white !important;
421
+ }
422
+
423
+ [data-testid] button p,
424
+ [data-testid] button span,
425
+ [data-testid] button div {
426
+ color: white !important;
427
+ }
428
+
429
+ </style>
430
+ """
GS_Sales_Proposal/Seller/seller_utils.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from typing import List
4
+ import os
5
+
6
+ from WebsiteUrl_Agent.agent_runner import get_urls
7
+ import asyncio
8
+ from Document_Upload_Vectordb.pain_points_extractor import *
9
+ from WebScraper.scrape import get_data
10
+
11
+
12
+ # Function to get URLs (placeholder function)
13
+
14
+ def get_urls_list(company_name) -> List[str]:
15
+ """
16
+ Placeholder function that returns a list of URLs
17
+ Replace this with your actual function that fetches URLs
18
+ """
19
+ return asyncio.run(get_urls(company_name))
20
+
21
+ def check_field_validation(field_name: str, field_value: str, is_mandatory: bool = False) -> bool:
22
+ """Check if field validation should show warning"""
23
+ if is_mandatory and not field_value.strip():
24
+ return True
25
+ return False
26
+
27
+ def show_field_warning(field_name: str):
28
+ """Show warning message for mandatory fields"""
29
+ st.markdown(f'<div class="field-warning">⚠️ {field_name} is mandatory and cannot be empty!</div>', unsafe_allow_html=True)
30
+
31
+ def get_url_details(url:str):
32
+ """Use this if you want to run async function synchronously"""
33
+ try:
34
+ # Run the async function synchronously
35
+ website_details = asyncio.run(get_data(url))
36
+ return website_details
37
+ except Exception as e:
38
+ print(f"Error: {e}")
39
+ return None
40
+
41
+ def save_uploaded_file_and_get_path(file):
42
+ return "saved"
43
+
44
+
45
+ def get_seller_services(filename , filepath):
46
+ return "pain points"
GS_Sales_Proposal/WebScraper/__pycache__/scrape.cpython-313.pyc ADDED
Binary file (6.2 kB). View file
 
GS_Sales_Proposal/WebScraper/__pycache__/scrape_utils.cpython-313.pyc ADDED
Binary file (1.91 kB). View file
 
GS_Sales_Proposal/WebScraper/__pycache__/state.cpython-313.pyc ADDED
Binary file (613 Bytes). View file
 
GS_Sales_Proposal/WebScraper/main.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ from scrape import get_data
3
+ async def main():
4
+ result = await get_data('https://www.whatsapp.com/')
5
+ print(result.extracted_content)
6
+
7
+ if __name__ == '__main__':
8
+ asyncio.run(main())
GS_Sales_Proposal/WebScraper/scrape.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+ import json
3
+
4
+ from WebScraper.state import User
5
+
6
+
7
+
8
+ from crawl4ai import LLMConfig,AsyncWebCrawler,CacheMode,CrawlerRunConfig,BrowserConfig
9
+ from crawl4ai.extraction_strategy import LLMExtractionStrategy
10
+ import os
11
+ from dotenv import load_dotenv
12
+ load_dotenv()
13
+
14
+ llm_strategy = LLMExtractionStrategy(
15
+ llm_config=LLMConfig(
16
+ provider="gemini/gemini-1.5-flash",
17
+ api_token=os.getenv("GOOGLE_API_KEY"),
18
+ ),
19
+ schema=User.model_json_schema(),
20
+ extraction_type="schema",
21
+ instruction="""
22
+ You are analyzing a webpage to extract structured information about the organization behind it.
23
+
24
+ Your goal is to extract the following:
25
+
26
+ 1. **Name**: The name of the organization or company.
27
+ 2. **Logo**: The URL of the primary logo image (typically found in the header or near the company name).
28
+ 3. **Detailed Description**: A clear and informative summary of what the organization does.
29
+ - This should come from the section of the page typically labeled or titled "About", "Who We Are", "Our Story", or similar.
30
+ - If the page does not have a heading, look for paragraphs or text blocks that describe the company's purpose, mission, background, or offerings.
31
+ - Do not include text that is clearly part of blog posts, testimonials, products, or contact details.
32
+
33
+ Tips:
34
+ - Focus on sections that describe the identity, mission, background, or goals of the organization.
35
+ - If multiple descriptive sections exist, prioritize the one closest to the top of the page or under an "About"-like heading.
36
+ - Avoid generic filler content like navigation menus, service listings, or unrelated calls to action.
37
+
38
+ Return the data in the format defined by the schema.
39
+ """
40
+ , chunk_token_threshold=1000,
41
+ overlap_rate=0.0,
42
+ apply_chunking=True,
43
+ input_format="markdown", # or "html", "fit_markdown"
44
+ extra_args={"temperature": 0.0, "max_tokens": 800}
45
+ )
46
+
47
+ crawl_config = CrawlerRunConfig(
48
+ extraction_strategy=llm_strategy,
49
+ cache_mode=CacheMode.BYPASS
50
+ )
51
+
52
+
53
+ browser_cfg = BrowserConfig(headless=True)
54
+
55
+ import re
56
+
57
+ import re
58
+ from collections import Counter
59
+ from typing import List
60
+
61
+
62
+
63
+
64
+ def aggregate_users(users: List[dict]) -> User:
65
+ print("🔍 Starting aggregation of users...")
66
+
67
+ # Filter out users with error=True
68
+ valid_users = [u for u in users if not u.get('error', False)]
69
+
70
+
71
+ # Most frequent name (non-empty and non-None)
72
+ names = [u.get('name', '') or '' for u in valid_users if u.get('name')]
73
+ name_counter = Counter(names)
74
+ name = name_counter.most_common(1)[0][0] if name_counter else (valid_users[0].get('name') or "Unknown")
75
+ logo = next(
76
+ (
77
+ logo for u in valid_users
78
+ if (logo := u.get('logo')) and isinstance(logo, str) and re.search(r'logo', logo, re.IGNORECASE)
79
+ ),
80
+ ""
81
+ )
82
+
83
+
84
+ # Longest non-empty description
85
+ descriptions = [u.get('description', '') or '' for u in valid_users]
86
+ description = max(descriptions, key=len, default="")
87
+
88
+ # Services list from user with the longest list (non-None)
89
+ all_service_lists = [
90
+ (u.get('name', 'Unknown'), u.get('services') or []) for u in valid_users
91
+ ]
92
+ services = max((s for _, s in all_service_lists), key=len, default=[])
93
+ return User(
94
+ name=name,
95
+ logo=logo,
96
+ description=description,
97
+ services=services
98
+ )
99
+
100
+ def format_enterprise_details(details_obj: User):
101
+ return f"""Name: {details_obj.name}
102
+ Description: {details_obj.description}
103
+ Services:
104
+ - {'\n- '.join(details_obj.services)}
105
+ """
106
+
107
+ async def get_data(url:str):
108
+
109
+ async with AsyncWebCrawler(config= browser_cfg) as crawler:
110
+ result = await crawler.arun(
111
+ url = url,
112
+ config = crawl_config)
113
+
114
+ if result.success:
115
+ print(f"Successfully scraped : '\n\n\n {result.extracted_content}")
116
+ lists = json.loads(result.extracted_content) # here instead of returning the last we may refine the one we need
117
+ #print(lists)
118
+ print(aggregate_users(lists))
119
+ return format_enterprise_details(aggregate_users(lists))
120
+
121
+ else:
122
+ print(f"The code exited with eroor {result.error_message}")
GS_Sales_Proposal/WebScraper/scrape_utils.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+ from urllib.parse import urljoin
5
+
6
+ def extract_hex_colors(url: str, limit: int = 5) -> list:
7
+ try:
8
+ response = requests.get(url, timeout=10)
9
+ soup = BeautifulSoup(response.text, 'html.parser')
10
+
11
+ # Find inline styles
12
+ inline_styles = [tag.get('style', '') for tag in soup.find_all(style=True)]
13
+ css_text = ' '.join(inline_styles)
14
+
15
+ # Find linked stylesheets
16
+ css_links = [link['href'] for link in soup.find_all('link', rel='stylesheet') if 'href' in link.attrs]
17
+
18
+ for href in css_links:
19
+ full_url = urljoin(url, href)
20
+ try:
21
+ css_response = requests.get(full_url, timeout=5)
22
+ css_text += ' ' + css_response.text
23
+ except:
24
+ continue
25
+
26
+ # Extract hex codes
27
+ hex_colors = re.findall(r'#[0-9a-fA-F]{3,6}', css_text)
28
+ hex_colors = list(dict.fromkeys(hex_colors)) # remove duplicates, preserve order
29
+ return hex_colors[:limit] # return top `limit` hex codes
30
+ except Exception as e:
31
+ print(f"Error extracting hex colors: {e}")
32
+ return []
GS_Sales_Proposal/WebScraper/state.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel,Field
2
+ from typing import List
3
+
4
+ class User(BaseModel):
5
+ name : str
6
+ logo : str
7
+ description : str
8
+ services: List[str]
GS_Sales_Proposal/WebsiteUrl_Agent/__pycache__/agent.cpython-312.pyc ADDED
Binary file (5.12 kB). View file