Abid Ali Awan commited on
Commit
4ce5fe1
Β·
1 Parent(s): 009f12b

Implement initial project structure and setup

Browse files
Files changed (2) hide show
  1. app.py +532 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,532 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from datetime import datetime
4
+ from typing import Dict, List, Any
5
+ from openai import OpenAI
6
+ from langgraph.graph import StateGraph, END
7
+ from tavily import TavilyClient
8
+ from mem0 import MemoryClient
9
+ import json
10
+
11
+ # Initialize services
12
+ tavily_client = TavilyClient(api_key=os.getenv("TAVILY_API_KEY"))
13
+
14
+ # Initialize Mem0 with API key
15
+ mem0_client = MemoryClient(api_key=os.getenv("MEM0_API_KEY"))
16
+
17
+ # Initialize OpenAI client with Keywords AI endpoint
18
+ client = OpenAI(
19
+ base_url="https://api.keywordsai.co/api/",
20
+ api_key=os.getenv("KEYWORDS_AI_API_KEY"),
21
+ )
22
+
23
+ # Regulatory websites mapping
24
+ REGULATORY_SOURCES = {
25
+ "US": {
26
+ "SEC": "https://www.sec.gov/news/pressreleases",
27
+ "FDA": "https://www.fda.gov/news-events/fda-newsroom/press-announcements",
28
+ "FTC": "https://www.ftc.gov/news-events/news/press-releases",
29
+ "CFTC": "https://www.cftc.gov/PressRoom/PressReleases",
30
+ "Federal Register": "https://www.federalregister.gov/documents/current",
31
+ },
32
+ "EU": {
33
+ "European Commission": "https://ec.europa.eu/commission/presscorner/home/en",
34
+ "ESMA": "https://www.esma.europa.eu/press-news/esma-news",
35
+ "EBA": "https://www.eba.europa.eu/news-press/news",
36
+ "ECB": "https://www.ecb.europa.eu/press/pr/html/index.en.html",
37
+ },
38
+ "Global": {
39
+ "BIS": "https://www.bis.org/press/index.htm",
40
+ "IOSCO": "https://www.iosco.org/news/",
41
+ "FSB": "https://www.fsb.org/press/",
42
+ },
43
+ }
44
+
45
+
46
+ # Define the state for our workflow
47
+ class RegRadarState(dict):
48
+ """State management for regulatory monitoring workflow"""
49
+
50
+ industry: str
51
+ region: str
52
+ keywords: str
53
+ crawl_results: List[Dict]
54
+ search_results: List[Dict]
55
+ summaries: List[Dict]
56
+ action_items: List[Dict]
57
+ user_id: str
58
+
59
+
60
+ # Helper function to make LLM calls
61
+ def call_llm(prompt: str, temperature: float = 0) -> str:
62
+ """Make a call to the LLM and return the response content"""
63
+ try:
64
+ response = client.chat.completions.create(
65
+ model="gpt-4o-mini",
66
+ messages=[{"role": "user", "content": prompt}],
67
+ temperature=temperature,
68
+ )
69
+ return response.choices[0].message.content
70
+ except Exception as e:
71
+ print(f"LLM call error: {e}")
72
+ return ""
73
+
74
+
75
+ # Define agent functions
76
+ def crawl_regulatory_sites(state: RegRadarState) -> RegRadarState:
77
+ """Crawl regulatory websites for updates using Tavily's crawl feature"""
78
+ region = state.get("region", "US")
79
+ industry = state.get("industry", "")
80
+ keywords = state.get("keywords", "")
81
+
82
+ # Get relevant regulatory URLs based on region
83
+ urls_to_crawl = REGULATORY_SOURCES.get(region, REGULATORY_SOURCES["US"])
84
+ all_crawl_results = []
85
+
86
+ # Construct crawl instructions
87
+ crawl_instructions = f"""
88
+ Find pages about:
89
+ - Recent regulatory updates, changes, or announcements
90
+ - New compliance requirements or guidelines
91
+ - Industry: {industry}
92
+ - Keywords: {keywords}
93
+ - Focus on content from the last 30 days
94
+ - Exclude navigation pages and general information
95
+ """
96
+
97
+ for source_name, url in urls_to_crawl.items():
98
+ try:
99
+ print(f"Crawling {source_name}...")
100
+
101
+ # Execute crawl with focused instructions
102
+ crawl_response = tavily_client.crawl(
103
+ url=url,
104
+ max_depth=2, # Don't go too deep
105
+ limit=10, # Limit results per source
106
+ instructions=crawl_instructions,
107
+ )
108
+
109
+ # Process crawl results
110
+ for result in crawl_response.get("results", []):
111
+ all_crawl_results.append(
112
+ {
113
+ "source": source_name,
114
+ "url": result.get("url", ""),
115
+ "title": result.get("title", ""),
116
+ "content": result.get("raw_content", "")[
117
+ :2000
118
+ ], # Limit content length
119
+ "crawled_at": datetime.now().isoformat(),
120
+ }
121
+ )
122
+
123
+ except Exception as e:
124
+ print(f"Crawl error for {source_name}: {e}")
125
+
126
+ state["crawl_results"] = all_crawl_results
127
+ return state
128
+
129
+
130
+ def search_additional_sources(state: RegRadarState) -> RegRadarState:
131
+ """Supplement crawl results with targeted searches"""
132
+ industry = state.get("industry", "")
133
+ region = state.get("region", "")
134
+ keywords = state.get("keywords", "")
135
+
136
+ # Construct search query
137
+ search_query = (
138
+ f"{industry} {region} regulatory changes compliance updates 2024 {keywords}"
139
+ )
140
+
141
+ try:
142
+ # Perform additional search for recent news
143
+ search_results = tavily_client.search(
144
+ query=search_query, max_results=5, include_raw_content=True
145
+ )
146
+
147
+ state["search_results"] = search_results.get("results", [])
148
+ except Exception as e:
149
+ state["search_results"] = []
150
+ print(f"Search error: {e}")
151
+
152
+ return state
153
+
154
+
155
+ def analyze_and_summarize(state: RegRadarState) -> RegRadarState:
156
+ """Analyze crawl and search results to create summaries"""
157
+ crawl_results = state.get("crawl_results", [])
158
+ search_results = state.get("search_results", [])
159
+
160
+ # Combine all results
161
+ all_results = []
162
+
163
+ # Add crawl results
164
+ for result in crawl_results:
165
+ all_results.append(
166
+ {
167
+ "type": "crawl",
168
+ "source": result.get("source", ""),
169
+ "title": result.get("title", ""),
170
+ "url": result.get("url", ""),
171
+ "content": result.get("content", ""),
172
+ }
173
+ )
174
+
175
+ # Add search results
176
+ for result in search_results:
177
+ all_results.append(
178
+ {
179
+ "type": "search",
180
+ "source": "Web Search",
181
+ "title": result.get("title", ""),
182
+ "url": result.get("url", ""),
183
+ "content": result.get("content", ""),
184
+ }
185
+ )
186
+
187
+ summaries = []
188
+
189
+ for result in all_results[:10]: # Limit to top 10 results
190
+ prompt = f"""
191
+ Analyze this regulatory update and provide:
192
+ 1. A concise summary (2-3 sentences)
193
+ 2. Key compliance implications
194
+ 3. Affected entities/sectors
195
+ 4. Effective date or timeline
196
+
197
+ Source: {result.get("source")}
198
+ Title: {result.get("title")}
199
+ Content: {result.get("content", "")[:1500]}
200
+ URL: {result.get("url", "")}
201
+ """
202
+
203
+ response_content = call_llm(prompt)
204
+
205
+ if response_content:
206
+ summaries.append(
207
+ {
208
+ "source": result.get("source", ""),
209
+ "title": result.get("title", ""),
210
+ "url": result.get("url", ""),
211
+ "summary": response_content,
212
+ "date": datetime.now().isoformat(),
213
+ "type": result.get("type", ""),
214
+ }
215
+ )
216
+
217
+ state["summaries"] = summaries
218
+ return state
219
+
220
+
221
+ def generate_action_items(state: RegRadarState) -> RegRadarState:
222
+ """Generate actionable compliance tasks based on findings"""
223
+ summaries = state["summaries"]
224
+ industry = state.get("industry", "")
225
+
226
+ if not summaries:
227
+ state["action_items"] = []
228
+ return state
229
+
230
+ prompt = f"""
231
+ Based on these regulatory updates for the {industry} industry, generate specific action items for compliance teams.
232
+
233
+ Updates found:
234
+ {json.dumps(summaries, indent=2)}
235
+
236
+ For each significant update, provide:
237
+ 1. Priority level (πŸ”΄ High / 🟑 Medium / 🟒 Low)
238
+ 2. Specific action required
239
+ 3. Timeline/deadline
240
+ 4. Responsible party/department
241
+ 5. Resources needed
242
+
243
+ Format as a structured, actionable list. Group by priority.
244
+ """
245
+
246
+ response_content = call_llm(prompt)
247
+
248
+ if response_content:
249
+ state["action_items"] = [
250
+ {"content": response_content, "generated_at": datetime.now().isoformat()}
251
+ ]
252
+ else:
253
+ state["action_items"] = []
254
+
255
+ return state
256
+
257
+
258
+ def store_in_memory(state: RegRadarState) -> RegRadarState:
259
+ """Store important updates in Mem0 for future reference"""
260
+ user_id = state.get("user_id", "default_user")
261
+
262
+ # Store summaries in memory
263
+ for summary in state["summaries"]:
264
+ try:
265
+ mem0_client.add(
266
+ messages=[
267
+ {
268
+ "role": "system",
269
+ "content": f"Regulatory update from {summary['source']}: {summary['title']} - {summary['summary']}",
270
+ }
271
+ ],
272
+ user_id=user_id,
273
+ metadata={
274
+ "type": "regulatory_update",
275
+ "source": summary["source"],
276
+ "date": summary["date"],
277
+ "url": summary["url"],
278
+ },
279
+ )
280
+ except Exception as e:
281
+ print(f"Memory storage error: {e}")
282
+
283
+ return state
284
+
285
+
286
+ # Build the workflow graph
287
+ def create_workflow():
288
+ workflow = StateGraph(RegRadarState)
289
+
290
+ # Add nodes
291
+ workflow.add_node("crawl", crawl_regulatory_sites)
292
+ workflow.add_node("search", search_additional_sources)
293
+ workflow.add_node("analyze", analyze_and_summarize)
294
+ workflow.add_node("generate_actions", generate_action_items)
295
+ workflow.add_node("store_memory", store_in_memory)
296
+
297
+ # Define flow
298
+ workflow.set_entry_point("crawl")
299
+ workflow.add_edge("crawl", "search")
300
+ workflow.add_edge("search", "analyze")
301
+ workflow.add_edge("analyze", "generate_actions")
302
+ workflow.add_edge("generate_actions", "store_memory")
303
+ workflow.add_edge("store_memory", END)
304
+
305
+ return workflow.compile()
306
+
307
+
308
+ # Initialize workflow
309
+ app_workflow = create_workflow()
310
+
311
+
312
+ # Gradio interface functions
313
+ def scan_regulations(industry, region, keywords, deep_scan):
314
+ """Main function to scan for regulatory updates"""
315
+
316
+ # Execute workflow
317
+ initial_state = RegRadarState(
318
+ industry=industry,
319
+ region=region,
320
+ keywords=keywords,
321
+ crawl_results=[],
322
+ search_results=[],
323
+ summaries=[],
324
+ action_items=[],
325
+ user_id="compliance_team",
326
+ )
327
+
328
+ result = app_workflow.invoke(initial_state)
329
+
330
+ # Format output
331
+ output = f"### πŸ“‹ Regulatory Update Report\n"
332
+ output += f"**Industry:** {industry} | **Region:** {region}\n"
333
+ output += f"**Scan Time:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S UTC')}\n\n"
334
+
335
+ # Show crawl statistics
336
+ crawl_count = len(result.get("crawl_results", []))
337
+ search_count = len(result.get("search_results", []))
338
+ output += f"πŸ“Š **Sources Analyzed:** {crawl_count} regulatory pages crawled, {search_count} additional sources searched\n\n"
339
+
340
+ if result["summaries"]:
341
+ output += "#### πŸ” Recent Regulatory Updates:\n\n"
342
+
343
+ # Group by source
344
+ by_source = {}
345
+ for summary in result["summaries"]:
346
+ source = summary["source"]
347
+ if source not in by_source:
348
+ by_source[source] = []
349
+ by_source[source].append(summary)
350
+
351
+ for source, items in by_source.items():
352
+ output += f"**πŸ“Œ {source}**\n\n"
353
+ for idx, summary in enumerate(items, 1):
354
+ output += f"**{idx}. {summary['title']}**\n"
355
+ output += f"{summary['summary']}\n"
356
+ output += f"[πŸ”— Source Link]({summary['url']})\n\n"
357
+ else:
358
+ output += "No recent regulatory updates found for your criteria.\n\n"
359
+
360
+ if result["action_items"]:
361
+ output += "#### βœ… Recommended Action Items:\n\n"
362
+ output += result["action_items"][0]["content"]
363
+
364
+ return output
365
+
366
+
367
+ def get_memory_insights(user_id="compliance_team", query=""):
368
+ """Retrieve historical regulatory updates from memory"""
369
+ try:
370
+ search_query = query if query else "regulatory updates"
371
+ memories = mem0_client.search(query=search_query, user_id=user_id, limit=20)
372
+
373
+ output = "### πŸ“š Historical Regulatory Updates\n\n"
374
+
375
+ if memories:
376
+ for idx, memory in enumerate(memories, 1):
377
+ output += f"**{idx}.** {memory.get('content', '')}\n"
378
+ if memory.get("metadata"):
379
+ output += (
380
+ f" - Source: {memory['metadata'].get('source', 'N/A')}\n"
381
+ )
382
+ output += f" - Date: {memory['metadata'].get('date', 'N/A')}\n\n"
383
+ else:
384
+ output += "No historical updates found matching your query.\n"
385
+
386
+ return output
387
+ except Exception as e:
388
+ return f"Error retrieving memories: {e}"
389
+
390
+
391
+ def analyze_custom_document(document_text):
392
+ """Analyze a custom regulatory document"""
393
+ if not document_text:
394
+ return "Please provide document text to analyze."
395
+
396
+ prompt = f"""
397
+ Analyze this regulatory document and provide:
398
+ 1. Executive summary (3-4 sentences)
399
+ 2. Key compliance requirements
400
+ 3. Affected parties
401
+ 4. Implementation timeline
402
+ 5. Potential challenges
403
+ 6. Recommended actions
404
+
405
+ Document:
406
+ {document_text[:3000]} # Limit to prevent token overflow
407
+ """
408
+
409
+ response_content = call_llm(prompt)
410
+
411
+ if response_content:
412
+ return f"### πŸ“„ Document Analysis\n\n{response_content}"
413
+ else:
414
+ return "Error analyzing document. Please try again."
415
+
416
+
417
+ # Create Gradio interface
418
+ with gr.Blocks(
419
+ title="RegRadar - Regulatory Compliance Copilot", theme=gr.themes.Soft()
420
+ ) as demo:
421
+ gr.Markdown("""
422
+ # 🚨 RegRadar - Autonomous Regulatory-Change Copilot
423
+
424
+ **AI-powered regulatory monitoring with intelligent web crawling**
425
+ """)
426
+
427
+ with gr.Tab("πŸ” Scan Regulations"):
428
+ with gr.Row():
429
+ with gr.Column(scale=1):
430
+ industry_input = gr.Dropdown(
431
+ label="Industry/Sector",
432
+ choices=[
433
+ "Finance",
434
+ "Healthcare",
435
+ "Technology",
436
+ "Energy",
437
+ "Manufacturing",
438
+ "Retail",
439
+ "Other",
440
+ ],
441
+ value="Finance",
442
+ )
443
+ region_input = gr.Dropdown(
444
+ label="Region", choices=["US", "EU", "Global"], value="US"
445
+ )
446
+
447
+ with gr.Column(scale=2):
448
+ keywords_input = gr.Textbox(
449
+ label="Keywords (optional)",
450
+ placeholder="e.g., AI, crypto, data privacy, ESG, cybersecurity",
451
+ lines=2,
452
+ )
453
+ deep_scan = gr.Checkbox(
454
+ label="Deep Scan (crawl regulatory websites)", value=True
455
+ )
456
+
457
+ scan_button = gr.Button(
458
+ "πŸš€ Start Regulatory Scan", variant="primary", size="lg"
459
+ )
460
+
461
+ output_display = gr.Markdown()
462
+
463
+ scan_button.click(
464
+ fn=scan_regulations,
465
+ inputs=[industry_input, region_input, keywords_input, deep_scan],
466
+ outputs=output_display,
467
+ )
468
+
469
+ with gr.Tab("πŸ“„ Analyze Document"):
470
+ document_input = gr.Textbox(
471
+ label="Paste regulatory document text",
472
+ placeholder="Paste the full text of a regulatory document, announcement, or compliance guideline...",
473
+ lines=10,
474
+ )
475
+ analyze_button = gr.Button("πŸ” Analyze Document", variant="primary")
476
+ document_output = gr.Markdown()
477
+
478
+ analyze_button.click(
479
+ fn=analyze_custom_document, inputs=document_input, outputs=document_output
480
+ )
481
+
482
+ with gr.Tab("πŸ“š Memory & History"):
483
+ search_memory = gr.Textbox(
484
+ label="Search historical updates",
485
+ placeholder="e.g., GDPR, SEC rules, FDA guidelines",
486
+ )
487
+ history_button = gr.Button("πŸ“– Search Historical Updates")
488
+ history_display = gr.Markdown()
489
+
490
+ history_button.click(
491
+ fn=get_memory_insights,
492
+ inputs=[gr.State("compliance_team"), search_memory],
493
+ outputs=history_display,
494
+ )
495
+
496
+ with gr.Tab("ℹ️ About"):
497
+ gr.Markdown("""
498
+ ### About RegRadar
499
+
500
+ RegRadar uses **advanced web crawling** to monitor regulatory changes:
501
+
502
+ #### πŸ•ΈοΈ Intelligent Crawling
503
+ - **Crawls official regulatory websites** (SEC, FDA, EU Commission, etc.)
504
+ - **Follows links up to 2 levels deep** to find relevant updates
505
+ - **Filters content** based on your industry and keywords
506
+
507
+ #### πŸ€– AI-Powered Analysis
508
+ - **Powered by GPT-4o-mini** via Keywords AI
509
+ - **Summarizes complex regulations** into clear insights
510
+ - **Identifies compliance implications** specific to your industry
511
+ - **Generates prioritized action items** with deadlines
512
+
513
+ #### 🧠 Persistent Memory
514
+ - **Remembers all findings** for future reference
515
+ - **Searchable history** of regulatory changes
516
+ - **Tracks compliance trends** over time
517
+
518
+ #### πŸ“„ Document Analysis
519
+ - **Analyze any regulatory document** you upload
520
+ - **Extract key requirements** and timelines
521
+ - **Get actionable recommendations**
522
+
523
+ **Technologies:**
524
+ - πŸ•·οΈ Tavily Crawl API for intelligent web traversal
525
+ - πŸ€– OpenAI GPT-4o-mini via Keywords AI
526
+ - 🧠 Mem0 for persistent memory
527
+ - πŸ”„ LangGraph for orchestration
528
+ """)
529
+
530
+ # Launch the app
531
+ if __name__ == "__main__":
532
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ langgraph==0.4.8
2
+ openai==1.88.0
3
+ tavily-python==0.7.6
4
+ mem0ai==0.1.108