AlBaraa63 commited on
Commit
c3de917
·
1 Parent(s): 443f8d3

Initial commit: MissionControlMCP - 8 Enterprise Automation Tools

Browse files
.gitignore ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ build/
8
+ develop-eggs/
9
+ dist/
10
+ downloads/
11
+ eggs/
12
+ .eggs/
13
+ lib/
14
+ lib64/
15
+ parts/
16
+ sdist/
17
+ var/
18
+ wheels/
19
+ pip-wheel-metadata/
20
+ share/python-wheels/
21
+ *.egg-info/
22
+ .installed.cfg
23
+ *.egg
24
+ MANIFEST
25
+
26
+ # Virtual Environment
27
+ venv/
28
+ env/
29
+ ENV/
30
+ env.bak/
31
+ venv.bak/
32
+
33
+ # PyCharm
34
+ .idea/
35
+
36
+ # VSCode
37
+ .vscode/
38
+ *.code-workspace
39
+
40
+ # Jupyter Notebook
41
+ .ipynb_checkpoints
42
+
43
+ # pytest
44
+ .pytest_cache/
45
+ .coverage
46
+ htmlcov/
47
+
48
+ # mypy
49
+ .mypy_cache/
50
+ .dmypy.json
51
+ dmypy.json
52
+
53
+ # Pyre type checker
54
+ .pyre/
55
+
56
+ # macOS
57
+ .DS_Store
58
+ .AppleDouble
59
+ .LSOverride
60
+
61
+ # Windows
62
+ Thumbs.db
63
+ ehthumbs.db
64
+ Desktop.ini
65
+ $RECYCLE.BIN/
66
+
67
+ # Logs
68
+ *.log
69
+
70
+ # Environment variables
71
+ .env
72
+ .env.local
73
+
74
+ # Model cache (sentence transformers)
75
+ .cache/
76
+ models/
77
+
78
+ # Hugging Face cache
79
+ ~/.cache/huggingface/
80
+
81
+ # Test output files
82
+ test_output/
83
+ *.pdf
84
+ *.txt
85
+ *.csv
86
+ output_*.png
87
+
88
+ # Temporary test files
89
+ test_*.py
90
+ temp/
91
+ tmp/
API.md ADDED
@@ -0,0 +1,583 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 📖 API Reference
2
+
3
+ Complete API documentation for all 8 MissionControlMCP tools.
4
+
5
+ ---
6
+
7
+ ## 1. PDF Reader
8
+
9
+ ### `read_pdf(file_path: str) -> Dict[str, Any]`
10
+
11
+ Extract text and metadata from PDF files.
12
+
13
+ **Parameters:**
14
+ - `file_path` (str): Absolute path to PDF file
15
+
16
+ **Returns:**
17
+ ```python
18
+ {
19
+ "text": str, # Full text content from all pages
20
+ "pages": int, # Number of pages
21
+ "metadata": { # Document metadata
22
+ "author": str,
23
+ "creator": str,
24
+ "producer": str,
25
+ "subject": str,
26
+ "title": str,
27
+ "creation_date": str,
28
+ "modification_date": str
29
+ }
30
+ }
31
+ ```
32
+
33
+ **Example:**
34
+ ```python
35
+ from tools.pdf_reader import read_pdf
36
+
37
+ result = read_pdf("C:/docs/report.pdf")
38
+ print(f"Pages: {result['pages']}")
39
+ print(f"Author: {result['metadata']['author']}")
40
+ print(result['text'][:500]) # First 500 chars
41
+ ```
42
+
43
+ **Errors:**
44
+ - `FileNotFoundError`: PDF file not found
45
+ - `ImportError`: PyPDF2 not installed
46
+ - `Exception`: Invalid or corrupted PDF
47
+
48
+ ---
49
+
50
+ ### `get_pdf_info(file_path: str) -> Dict[str, Any]`
51
+
52
+ Get basic PDF information without extracting text.
53
+
54
+ **Parameters:**
55
+ - `file_path` (str): Path to PDF file
56
+
57
+ **Returns:**
58
+ ```python
59
+ {
60
+ "page_count": int,
61
+ "is_encrypted": bool,
62
+ "file_size_bytes": int,
63
+ "file_name": str
64
+ }
65
+ ```
66
+
67
+ ---
68
+
69
+ ## 2. Text Extractor
70
+
71
+ ### `extract_text(text: str, operation: str, **kwargs) -> Dict[str, Any]`
72
+
73
+ Process and extract information from text.
74
+
75
+ **Parameters:**
76
+ - `text` (str): Input text to process
77
+ - `operation` (str): Operation type
78
+ - `"clean"` - Remove extra whitespace
79
+ - `"summarize"` - Create summary
80
+ - `"chunk"` - Split into chunks
81
+ - `"keywords"` - Extract keywords
82
+ - `**kwargs`: Operation-specific parameters
83
+
84
+ **Operation: clean**
85
+ ```python
86
+ extract_text(text, operation="clean")
87
+ # Returns: {"result": str, "word_count": int}
88
+ ```
89
+
90
+ **Operation: summarize**
91
+ ```python
92
+ extract_text(text, operation="summarize", max_length=500)
93
+ # max_length: Maximum summary length (default: 500)
94
+ # Returns: {"result": str, "word_count": int, "original_length": int}
95
+ ```
96
+
97
+ **Operation: chunk**
98
+ ```python
99
+ extract_text(text, operation="chunk", chunk_size=100, overlap=20)
100
+ # chunk_size: Characters per chunk (default: 100)
101
+ # overlap: Overlapping characters (default: 20)
102
+ # Returns: {"chunks": List[str], "chunk_count": int}
103
+ ```
104
+
105
+ **Operation: keywords**
106
+ ```python
107
+ extract_text(text, operation="keywords", top_n=10)
108
+ # top_n: Number of keywords (default: 10)
109
+ # Returns: {"result": str, "keywords": List[str]}
110
+ ```
111
+
112
+ **Example:**
113
+ ```python
114
+ from tools.text_extractor import extract_text
115
+
116
+ # Get keywords
117
+ result = extract_text("Your text here...", operation="keywords")
118
+ print(result['result']) # "keyword1, keyword2, keyword3"
119
+
120
+ # Summarize
121
+ summary = extract_text("Long text...", operation="summarize", max_length=200)
122
+ print(summary['result'])
123
+ ```
124
+
125
+ ---
126
+
127
+ ## 3. Web Fetcher
128
+
129
+ ### `fetch_web_content(url: str, timeout: int = 30) -> Dict[str, Any]`
130
+
131
+ Fetch and parse web page content.
132
+
133
+ **Parameters:**
134
+ - `url` (str): Website URL
135
+ - `timeout` (int): Request timeout in seconds (default: 30)
136
+
137
+ **Returns:**
138
+ ```python
139
+ {
140
+ "url": str,
141
+ "title": str,
142
+ "content": str, # Clean text content
143
+ "html": str, # Raw HTML
144
+ "links": List[str], # All URLs found
145
+ "status_code": int, # HTTP status
146
+ "timestamp": str
147
+ }
148
+ ```
149
+
150
+ **Example:**
151
+ ```python
152
+ from tools.web_fetcher import fetch_web_content
153
+
154
+ result = fetch_web_content("https://example.com")
155
+ print(f"Title: {result['title']}")
156
+ print(f"Content: {result['content'][:200]}")
157
+ print(f"Links found: {len(result['links'])}")
158
+ ```
159
+
160
+ **Errors:**
161
+ - `requests.exceptions.Timeout`: Request timed out
162
+ - `requests.exceptions.RequestException`: Network error
163
+ - `Exception`: Invalid URL or parsing error
164
+
165
+ ---
166
+
167
+ ## 4. RAG Search
168
+
169
+ ### `search_documents(query: str, documents: List[str], top_k: int = 3) -> Dict[str, Any]`
170
+
171
+ Semantic search using vector embeddings and FAISS.
172
+
173
+ **Parameters:**
174
+ - `query` (str): Search query
175
+ - `documents` (List[str]): List of documents to search
176
+ - `top_k` (int): Number of results to return (default: 3)
177
+
178
+ **Returns:**
179
+ ```python
180
+ {
181
+ "query": str,
182
+ "total_documents": int,
183
+ "returned_results": int,
184
+ "results": [
185
+ {
186
+ "rank": int,
187
+ "document": str,
188
+ "score": float, # 0.0 to 1.0 (higher = more relevant)
189
+ "distance": float # L2 distance
190
+ }
191
+ ]
192
+ }
193
+ ```
194
+
195
+ **Example:**
196
+ ```python
197
+ from tools.rag_search import search_documents
198
+
199
+ docs = [
200
+ "Machine learning is a subset of AI",
201
+ "Python is a programming language",
202
+ "Data science uses statistics"
203
+ ]
204
+
205
+ result = search_documents("artificial intelligence", docs, top_k=2)
206
+
207
+ for item in result['results']:
208
+ print(f"Score: {item['score']:.4f} - {item['document']}")
209
+ ```
210
+
211
+ **Features:**
212
+ - Semantic matching (understands meaning, not just keywords)
213
+ - Uses sentence-transformers (all-MiniLM-L6-v2)
214
+ - FAISS for fast vector search
215
+
216
+ ---
217
+
218
+ ### `multi_query_search(queries: List[str], documents: List[str], top_k: int = 3) -> Dict[str, Any]`
219
+
220
+ Search multiple queries at once.
221
+
222
+ **Returns:**
223
+ ```python
224
+ {
225
+ "queries": List[str],
226
+ "results": {
227
+ "query1": [results],
228
+ "query2": [results]
229
+ }
230
+ }
231
+ ```
232
+
233
+ ---
234
+
235
+ ## 5. Data Visualizer
236
+
237
+ ### `visualize_data(data: str, chart_type: str, x_column: str = None, y_column: str = None, title: str = "Data Visualization") -> Dict[str, Any]`
238
+
239
+ Create charts from CSV or JSON data.
240
+
241
+ **Parameters:**
242
+ - `data` (str): CSV or JSON string
243
+ - `chart_type` (str): Chart type
244
+ - `"bar"` - Bar chart
245
+ - `"line"` - Line chart
246
+ - `"pie"` - Pie chart
247
+ - `"scatter"` - Scatter plot
248
+ - `x_column` (str): X-axis column name
249
+ - `y_column` (str): Y-axis column name
250
+ - `title` (str): Chart title
251
+
252
+ **Returns:**
253
+ ```python
254
+ {
255
+ "image_base64": str, # Base64-encoded PNG image
256
+ "dimensions": {
257
+ "width": int,
258
+ "height": int
259
+ },
260
+ "chart_type": str,
261
+ "title": str,
262
+ "columns_used": {
263
+ "x": str,
264
+ "y": str
265
+ }
266
+ }
267
+ ```
268
+
269
+ **Example:**
270
+ ```python
271
+ from tools.data_visualizer import visualize_data
272
+ import base64
273
+
274
+ csv_data = """month,revenue
275
+ Jan,5000000
276
+ Feb,5200000
277
+ Mar,5400000"""
278
+
279
+ result = visualize_data(
280
+ data=csv_data,
281
+ chart_type="line",
282
+ x_column="month",
283
+ y_column="revenue",
284
+ title="Revenue Trends"
285
+ )
286
+
287
+ # Save chart
288
+ with open("chart.png", "wb") as f:
289
+ f.write(base64.b64decode(result['image_base64']))
290
+ ```
291
+
292
+ ---
293
+
294
+ ## 6. File Converter
295
+
296
+ ### `convert_file(input_path: str, output_path: str, conversion_type: str) -> Dict[str, Any]`
297
+
298
+ Convert between PDF, TXT, and CSV formats.
299
+
300
+ **Parameters:**
301
+ - `input_path` (str): Input file path
302
+ - `output_path` (str): Output file path
303
+ - `conversion_type` (str): Conversion type
304
+ - `"pdf_to_txt"` - PDF → Text
305
+ - `"txt_to_pdf"` - Text → PDF
306
+ - `"csv_to_txt"` - CSV → Text
307
+ - `"txt_to_csv"` - Text → CSV
308
+
309
+ **Returns:**
310
+ ```python
311
+ {
312
+ "success": bool,
313
+ "input_file": str,
314
+ "output_file": str,
315
+ "conversion_type": str,
316
+ "file_size_bytes": int
317
+ }
318
+ ```
319
+
320
+ **Example:**
321
+ ```python
322
+ from tools.file_converter import convert_file
323
+
324
+ result = convert_file(
325
+ input_path="document.pdf",
326
+ output_path="document.txt",
327
+ conversion_type="pdf_to_txt"
328
+ )
329
+
330
+ print(f"Converted: {result['success']}")
331
+ print(f"Output: {result['output_file']}")
332
+ ```
333
+
334
+ ---
335
+
336
+ ## 7. Email Intent Classifier
337
+
338
+ ### `classify_email_intent(email_text: str) -> Dict[str, Any]`
339
+
340
+ Classify email intent using NLP pattern matching.
341
+
342
+ **Parameters:**
343
+ - `email_text` (str): Email content (subject + body)
344
+
345
+ **Returns:**
346
+ ```python
347
+ {
348
+ "intent": str, # Primary intent
349
+ "confidence": float, # 0.0 to 1.0
350
+ "secondary_intents": [
351
+ {
352
+ "intent": str,
353
+ "confidence": float
354
+ }
355
+ ],
356
+ "explanation": str
357
+ }
358
+ ```
359
+
360
+ **Intent Types:**
361
+ - `complaint` - Customer complaints
362
+ - `inquiry` - Information requests
363
+ - `request` - Action requests
364
+ - `feedback` - Suggestions/reviews
365
+ - `order` - Purchase-related
366
+ - `meeting` - Meeting scheduling
367
+ - `urgent` - High priority issues
368
+ - `application` - Job applications
369
+ - `sales` - Sales pitches
370
+ - `other` - Unclassified
371
+
372
+ **Example:**
373
+ ```python
374
+ from tools.email_intent_classifier import classify_email_intent
375
+
376
+ email = """
377
+ Subject: Order Issue
378
+ My order #12345 hasn't arrived yet. Can you help?
379
+ """
380
+
381
+ result = classify_email_intent(email)
382
+ print(f"Intent: {result['intent']}") # "complaint"
383
+ print(f"Confidence: {result['confidence']}") # 0.85
384
+ ```
385
+
386
+ ---
387
+
388
+ ### `classify_batch(emails: List[str]) -> Dict[str, Any]`
389
+
390
+ Classify multiple emails at once.
391
+
392
+ **Returns:**
393
+ ```python
394
+ {
395
+ "results": [
396
+ {"email_index": int, "intent": str, "confidence": float},
397
+ ...
398
+ ],
399
+ "total_processed": int
400
+ }
401
+ ```
402
+
403
+ ---
404
+
405
+ ## 8. KPI Generator
406
+
407
+ ### `generate_kpis(data: str, metrics: List[str] = None) -> Dict[str, Any]`
408
+
409
+ Calculate business KPIs from financial data.
410
+
411
+ **Parameters:**
412
+ - `data` (str): JSON string with business data
413
+ - `metrics` (List[str]): Metric categories (optional)
414
+ - `"revenue"` - Revenue-related KPIs
415
+ - `"growth"` - Growth rates
416
+ - `"efficiency"` - Efficiency metrics
417
+ - `"customer"` - Customer metrics
418
+ - `"operational"` - Operational metrics
419
+
420
+ **Input Data Format:**
421
+ ```json
422
+ {
423
+ "revenue": 5000000,
424
+ "costs": 3000000,
425
+ "customers": 2500,
426
+ "current_revenue": 5000000,
427
+ "previous_revenue": 4500000,
428
+ "current_customers": 2500,
429
+ "previous_customers": 2300,
430
+ "employees": 50,
431
+ "marketing_spend": 500000,
432
+ "sales": 5000000,
433
+ "cogs": 2000000
434
+ }
435
+ ```
436
+
437
+ **Returns:**
438
+ ```python
439
+ {
440
+ "kpis": {
441
+ "total_revenue": float,
442
+ "profit": float,
443
+ "profit_margin_percent": float,
444
+ "revenue_growth": float,
445
+ "revenue_per_customer": float,
446
+ "revenue_per_employee": float,
447
+ "customer_growth_rate": float,
448
+ ...
449
+ },
450
+ "summary": str, # Executive summary
451
+ "trends": List[str], # Identified trends
452
+ "metrics_analyzed": List[str],
453
+ "data_points": int
454
+ }
455
+ ```
456
+
457
+ **Example:**
458
+ ```python
459
+ from tools.kpi_generator import generate_kpis
460
+ import json
461
+
462
+ data = {
463
+ "revenue": 5000000,
464
+ "costs": 3000000,
465
+ "customers": 2500,
466
+ "employees": 50
467
+ }
468
+
469
+ result = generate_kpis(json.dumps(data), metrics=["revenue", "efficiency"])
470
+
471
+ print(f"Profit: ${result['kpis']['profit']:,.0f}")
472
+ print(f"Margin: {result['kpis']['profit_margin_percent']:.1f}%")
473
+ print(f"\nSummary: {result['summary']}")
474
+ ```
475
+
476
+ ---
477
+
478
+ ## Error Handling
479
+
480
+ All tools follow consistent error handling:
481
+
482
+ ```python
483
+ try:
484
+ result = tool_function(params)
485
+ except FileNotFoundError as e:
486
+ print(f"File not found: {e}")
487
+ except ValueError as e:
488
+ print(f"Invalid input: {e}")
489
+ except ImportError as e:
490
+ print(f"Missing dependency: {e}")
491
+ except Exception as e:
492
+ print(f"Unexpected error: {e}")
493
+ ```
494
+
495
+ ---
496
+
497
+ ## Type Hints
498
+
499
+ All functions use Python type hints:
500
+
501
+ ```python
502
+ from typing import Dict, Any, List
503
+
504
+ def function_name(param: str) -> Dict[str, Any]:
505
+ ...
506
+ ```
507
+
508
+ ---
509
+
510
+ ## Logging
511
+
512
+ All tools use Python logging:
513
+
514
+ ```python
515
+ import logging
516
+ logger = logging.getLogger(__name__)
517
+
518
+ logger.info("Operation completed")
519
+ logger.warning("Warning message")
520
+ logger.error("Error occurred")
521
+ ```
522
+
523
+ ---
524
+
525
+ ## Dependencies
526
+
527
+ See `requirements.txt` for all dependencies:
528
+
529
+ ```txt
530
+ mcp>=1.0.0
531
+ pypdf2>=3.0.0
532
+ requests>=2.31.0
533
+ beautifulsoup4>=4.12.0
534
+ pandas>=2.0.0
535
+ numpy>=1.24.0
536
+ matplotlib>=3.7.0
537
+ seaborn>=0.12.0
538
+ scikit-learn>=1.3.0
539
+ nltk>=3.8.0
540
+ pydantic>=2.0.0
541
+ faiss-cpu>=1.7.4
542
+ sentence-transformers>=2.2.0
543
+ ```
544
+
545
+ ---
546
+
547
+ ## MCP Integration
548
+
549
+ All tools are registered in `mcp_server.py`:
550
+
551
+ ```python
552
+ server.register_tool(
553
+ name="pdf_reader",
554
+ description="Extract text and metadata from PDF files",
555
+ input_schema={
556
+ "type": "object",
557
+ "properties": {
558
+ "file_path": {"type": "string"}
559
+ },
560
+ "required": ["file_path"]
561
+ }
562
+ )
563
+ ```
564
+
565
+ ---
566
+
567
+ ## Version Information
568
+
569
+ - **API Version:** 1.0.0
570
+ - **Python:** 3.8+
571
+ - **MCP Protocol:** 1.0.0
572
+
573
+ ---
574
+
575
+ ## Support
576
+
577
+ For issues or questions:
578
+ - GitHub: AlBaraa-1/CleanEye-Hackathon
579
+ - Documentation: README.md
580
+ - Examples: EXAMPLES.md
581
+ - Testing: TESTING.md
582
+
583
+ **Complete API reference for MissionControlMCP!** 🚀
ARCHITECTURE.md ADDED
@@ -0,0 +1,557 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 🏗️ System Architecture
2
+
3
+ MissionControlMCP system design and architecture documentation.
4
+
5
+ ---
6
+
7
+ ## 📊 High-Level Architecture
8
+
9
+ ```
10
+ ┌─────────────────────────────────────────────────────────────┐
11
+ │ Client Layer │
12
+ │ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │
13
+ │ │ Claude │ │ Custom │ │ Other MCP │ │
14
+ │ │ Desktop │ │ Client │ │ Clients │ │
15
+ │ └──────────────┘ └──────────────┘ └──────────────┘ │
16
+ └──────────────────────┬──────────────────────────────────────┘
17
+ │ MCP Protocol (stdio)
18
+ ┌──────────────────────┴──────────────────────────────────────┐
19
+ │ MCP Server Layer │
20
+ │ ┌────────────────────────────────────────────────────────┐ │
21
+ │ │ mcp_server.py │ │
22
+ │ │ • Tool Registration │ │
23
+ │ │ • Request Routing │ │
24
+ │ │ • Response Formatting │ │
25
+ │ └────────────────────────────────────────────────────────┘ │
26
+ └──────────────────────┬──────────────────────────────────────┘
27
+
28
+ ┌──────────────────────┴──────────────────────────────────────┐
29
+ │ Business Logic Layer │
30
+ │ ┌──────────┬──────────┬──────────┬──────────┐ │
31
+ │ │ PDF │ Text │ Web │ RAG │ │
32
+ │ │ Reader │ Extract │ Fetcher │ Search │ │
33
+ │ ├──────────┼──────────┼──────────┼──────────┤ │
34
+ │ │ Data │ File │ Email │ KPI │ │
35
+ │ │ Visual │ Convert │ Classify │ Generate │ │
36
+ │ └──────────┴──────────┴──────────┴──────────┘ │
37
+ └──────────────────────┬──────────────────────────────────────┘
38
+
39
+ ┌──────────────────────┴──────────────────────────────────────┐
40
+ │ Utility Layer │
41
+ │ ┌────────────────────────────────────────────────────────┐ │
42
+ │ │ • helpers.py - Text processing utilities │ │
43
+ │ │ • rag_utils.py - Vector search & FAISS │ │
44
+ │ │ • schemas.py - Pydantic models │ │
45
+ │ └────────────────────────────────────────────────────────┘ │
46
+ └─────────────────────────────────────────────────────────────┘
47
+ ```
48
+
49
+ ---
50
+
51
+ ## 🧩 Component Architecture
52
+
53
+ ### 1. MCP Server (`mcp_server.py`)
54
+
55
+ **Responsibilities:**
56
+ - Register all 8 tools with MCP SDK
57
+ - Handle incoming tool requests
58
+ - Route requests to appropriate tool functions
59
+ - Format and return responses
60
+ - Error handling and logging
61
+
62
+ **Flow:**
63
+ ```
64
+ Client Request → MCP Protocol → Server → Tool → Response → Client
65
+ ```
66
+
67
+ **Code Structure:**
68
+ ```python
69
+ # Tool Registration
70
+ server.register_tool(name, description, input_schema)
71
+
72
+ # Request Handler
73
+ async def call_tool(name, arguments):
74
+ if name == "pdf_reader":
75
+ return await pdf_reader.read_pdf(**arguments)
76
+ elif name == "text_extractor":
77
+ return await text_extractor.extract_text(**arguments)
78
+ # ... other tools
79
+
80
+ # Server Startup
81
+ async with stdio_server() as (read_stream, write_stream):
82
+ await server.run(read_stream, write_stream)
83
+ ```
84
+
85
+ ---
86
+
87
+ ### 2. Tool Layer (`tools/`)
88
+
89
+ Each tool is independent and follows this pattern:
90
+
91
+ **Tool Structure:**
92
+ ```python
93
+ """
94
+ Tool Name - Description
95
+ """
96
+ import logging
97
+ from typing import Dict, Any
98
+
99
+ logger = logging.getLogger(__name__)
100
+
101
+ def tool_function(param: str) -> Dict[str, Any]:
102
+ """
103
+ Tool description.
104
+
105
+ Args:
106
+ param: Parameter description
107
+
108
+ Returns:
109
+ Standardized result dictionary
110
+ """
111
+ try:
112
+ # Validation
113
+ if not param:
114
+ raise ValueError("Invalid input")
115
+
116
+ # Processing
117
+ result = process_data(param)
118
+
119
+ # Return standardized format
120
+ return {
121
+ "success": True,
122
+ "data": result,
123
+ "metadata": {}
124
+ }
125
+
126
+ except Exception as e:
127
+ logger.error(f"Error: {e}")
128
+ raise
129
+ ```
130
+
131
+ **Tool Independence:**
132
+ - Each tool is self-contained
133
+ - No dependencies between tools
134
+ - Can be tested individually
135
+ - Easy to add/remove tools
136
+
137
+ ---
138
+
139
+ ### 3. Utility Layer (`utils/`)
140
+
141
+ **helpers.py - Text Processing:**
142
+ ```python
143
+ • clean_text() - Remove extra whitespace
144
+ • extract_keywords() - NLP keyword extraction
145
+ • chunk_text() - Text splitting with overlap
146
+ • validate_url() - URL validation
147
+ ```
148
+
149
+ **rag_utils.py - Vector Search:**
150
+ ```python
151
+ • SimpleRAGStore - FAISS-based vector database
152
+ • semantic_search() - Sentence transformer embeddings
153
+ • create_rag_store() - Initialize vector store
154
+ ```
155
+
156
+ **Models (models/schemas.py):**
157
+ ```python
158
+ • Pydantic models for type validation
159
+ • Input/output schemas
160
+ • Data validation
161
+ ```
162
+
163
+ ---
164
+
165
+ ## 🔄 Data Flow
166
+
167
+ ### Request Flow
168
+
169
+ ```
170
+ 1. Client sends MCP request
171
+
172
+ 2. mcp_server.py receives request
173
+
174
+ 3. Server validates input schema
175
+
176
+ 4. Server routes to tool function
177
+
178
+ 5. Tool processes data
179
+
180
+ 6. Tool returns result dict
181
+
182
+ 7. Server formats MCP response
183
+
184
+ 8. Client receives response
185
+ ```
186
+
187
+ ### Example: PDF Reading Flow
188
+
189
+ ```
190
+ Client: "Read this PDF"
191
+
192
+ MCP Server: Receives pdf_reader request
193
+
194
+ pdf_reader.py: read_pdf(file_path)
195
+
196
+ PyPDF2: Extract text from pages
197
+
198
+ Return: {text, pages, metadata}
199
+
200
+ MCP Server: Format response
201
+
202
+ Client: Receives extracted text
203
+ ```
204
+
205
+ ---
206
+
207
+ ## 🗂️ Project Structure
208
+
209
+ ```
210
+ mission_control_mcp/
211
+
212
+ ├── mcp_server.py # MCP server entry point
213
+
214
+ ├── tools/ # 8 independent tools
215
+ │ ├── pdf_reader.py # PDF text extraction
216
+ │ ├── text_extractor.py # Text processing (4 ops)
217
+ │ ├── web_fetcher.py # Web scraping
218
+ │ ├── rag_search.py # Semantic search
219
+ │ ├── data_visualizer.py # Chart generation
220
+ │ ├── file_converter.py # File format conversion
221
+ │ ├── email_intent_classifier.py # Email classification
222
+ │ └── kpi_generator.py # Business metrics
223
+
224
+ ├── utils/ # Shared utilities
225
+ │ ├── helpers.py # Text processing helpers
226
+ │ └── rag_utils.py # Vector search utilities
227
+
228
+ ├── models/ # Data models
229
+ │ └── schemas.py # Pydantic schemas
230
+
231
+ ├── examples/ # Sample test data
232
+ │ ├── sample_report.txt # Business report
233
+ │ ├── business_data.csv # Financial data
234
+ │ ├── sample_email_*.txt # Email samples
235
+ │ └── sample_documents.txt # RAG search docs
236
+
237
+ ├── app.py # Gradio web interface
238
+ ├── demo.py # Demo & test suite
239
+
240
+ ├── docs/ # Documentation
241
+ │ ├── README.md # Main documentation
242
+ │ ├── API.md # API reference
243
+ │ ├── EXAMPLES.md # Use cases
244
+ │ ├── TESTING.md # Testing guide
245
+ │ ├── ARCHITECTURE.md # This file
246
+ │ └── CONTRIBUTING.md # Contribution guide
247
+
248
+ ├── requirements.txt # Python dependencies
249
+ ├── .gitignore # Git ignore rules
250
+ └── LICENSE # MIT License
251
+ ```
252
+
253
+ ---
254
+
255
+ ## 🔌 Integration Points
256
+
257
+ ### MCP Protocol Integration
258
+
259
+ ```python
260
+ from mcp.server import Server
261
+ from mcp.types import Tool, TextContent
262
+
263
+ # Create server
264
+ server = Server("mission-control")
265
+
266
+ # Register tool
267
+ @server.tool()
268
+ async def pdf_reader(file_path: str) -> str:
269
+ result = read_pdf(file_path)
270
+ return json.dumps(result)
271
+
272
+ # Run server
273
+ await server.run(stdin, stdout)
274
+ ```
275
+
276
+ ### Claude Desktop Integration
277
+
278
+ **Configuration:**
279
+ ```json
280
+ {
281
+ "mcpServers": {
282
+ "mission-control": {
283
+ "command": "python",
284
+ "args": ["path/to/mcp_server.py"]
285
+ }
286
+ }
287
+ }
288
+ ```
289
+
290
+ **Communication:**
291
+ ```
292
+ Claude Desktop ←→ MCP Protocol ←→ mcp_server.py ←→ Tools
293
+ ```
294
+
295
+ ---
296
+
297
+ ## 🚀 Scalability Design
298
+
299
+ ### Horizontal Scaling
300
+
301
+ **Current:** Single-process server
302
+ **Future:** Multi-process with load balancing
303
+
304
+ ```
305
+ Load Balancer
306
+
307
+ ┌──────────┼──────────┐
308
+ │ │ │
309
+ Server 1 Server 2 Server 3
310
+ │ │ │
311
+ └──────────┴──────────┘
312
+ Tools
313
+ ```
314
+
315
+ ### Caching Strategy
316
+
317
+ **Implemented:**
318
+ - RAG model caching (sentence transformers)
319
+ - NLTK data caching
320
+
321
+ **Future Improvements:**
322
+ - Redis for result caching
323
+ - Database for document storage
324
+ - CDN for static assets
325
+
326
+ ---
327
+
328
+ ## 🔒 Security Architecture
329
+
330
+ ### Input Validation
331
+
332
+ ```python
333
+ # Pydantic schemas
334
+ from pydantic import BaseModel, validator
335
+
336
+ class PDFReaderInput(BaseModel):
337
+ file_path: str
338
+
339
+ @validator('file_path')
340
+ def validate_path(cls, v):
341
+ if not Path(v).exists():
342
+ raise ValueError("File not found")
343
+ return v
344
+ ```
345
+
346
+ ### Error Handling
347
+
348
+ ```python
349
+ try:
350
+ result = tool_function(input)
351
+ except FileNotFoundError:
352
+ return {"error": "File not found", "code": 404}
353
+ except ValueError:
354
+ return {"error": "Invalid input", "code": 400}
355
+ except Exception:
356
+ return {"error": "Internal error", "code": 500}
357
+ ```
358
+
359
+ ### Authentication
360
+
361
+ **Current:** None (local tool execution)
362
+ **Production Considerations:**
363
+ - API key authentication
364
+ - Rate limiting
365
+ - Request logging
366
+ - User permissions
367
+
368
+ ---
369
+
370
+ ## 📊 Performance Characteristics
371
+
372
+ ### Tool Performance
373
+
374
+ | Tool | Avg Time | Memory | Notes |
375
+ |------|----------|--------|-------|
376
+ | PDF Reader | 1s | 50MB | Depends on PDF size |
377
+ | Text Extractor | 0.5s | 10MB | Fast text processing |
378
+ | Web Fetcher | 2-3s | 20MB | Network dependent |
379
+ | RAG Search | 2.5s* | 200MB | *First run (model load) |
380
+ | RAG Search | 0.5s | 200MB | Subsequent runs |
381
+ | Data Visualizer | 1.2s | 30MB | Chart generation |
382
+ | File Converter | 1-2s | 50MB | File size dependent |
383
+ | Email Classifier | 0.1s | 5MB | Very fast |
384
+ | KPI Generator | 0.3s | 10MB | Quick calculations |
385
+
386
+ ### Bottlenecks
387
+
388
+ 1. **RAG Search** - Initial model loading (~2s)
389
+ - Solution: Keep model in memory
390
+
391
+ 2. **Web Fetcher** - Network latency
392
+ - Solution: Async requests, caching
393
+
394
+ 3. **PDF Reader** - Large files
395
+ - Solution: Stream processing
396
+
397
+ ---
398
+
399
+ ## 🔄 State Management
400
+
401
+ ### Stateless Design
402
+
403
+ Each tool request is independent:
404
+ - No session state
405
+ - No user context
406
+ - Pure function design
407
+
408
+ **Benefits:**
409
+ - Easy scaling
410
+ - No state synchronization
411
+ - Simple debugging
412
+ - High availability
413
+
414
+ ### RAG Store State
415
+
416
+ Exception: RAG search maintains in-memory vector store:
417
+ ```python
418
+ class SimpleRAGStore:
419
+ def __init__(self):
420
+ self.documents = []
421
+ self.index = None # FAISS index
422
+ ```
423
+
424
+ **Lifecycle:**
425
+ - Created on first search
426
+ - Persists during server lifetime
427
+ - Cleared on server restart
428
+
429
+ ---
430
+
431
+ ## 🧪 Testing Architecture
432
+
433
+ ### Test Pyramid
434
+
435
+ ```
436
+ ┌─────────────┐
437
+ │ E2E Tests │ (MCP integration)
438
+ ├─────────────┤
439
+ │ Integration │ (Tool combinations)
440
+ ├─────────────┤
441
+ │ Unit Tests │ (Individual functions)
442
+ └─────────────┘
443
+ ```
444
+
445
+ ### Test Coverage
446
+
447
+ - **Unit Tests:** Test each function independently
448
+ - **Integration Tests:** Test tool interactions
449
+ - **MCP Tests:** Test server communication
450
+ - **Sample Tests:** Test with real data
451
+
452
+ ---
453
+
454
+ ## 📦 Dependency Management
455
+
456
+ ### Core Dependencies
457
+
458
+ ```
459
+ MCP SDK (>=1.0.0)
460
+ ├── stdio communication
461
+ └── Tool registration
462
+
463
+ Processing Libraries
464
+ ├── PyPDF2 (PDF reading)
465
+ ├── BeautifulSoup4 (HTML parsing)
466
+ ├── Pandas (Data processing)
467
+ └── Matplotlib (Visualization)
468
+
469
+ ML/NLP Libraries
470
+ ├── scikit-learn (Text processing)
471
+ ├── NLTK (Keyword extraction)
472
+ ├── sentence-transformers (Embeddings)
473
+ └── FAISS (Vector search)
474
+ ```
475
+
476
+ ### Optional Dependencies
477
+
478
+ - faiss-cpu: Can use faiss-gpu on GPU systems
479
+ - reportlab: Optional for PDF generation
480
+
481
+ ---
482
+
483
+ ## 🔮 Future Architecture Improvements
484
+
485
+ ### Planned Enhancements
486
+
487
+ 1. **Database Integration**
488
+ ```
489
+ PostgreSQL for persistent storage
490
+ Redis for caching
491
+ ```
492
+
493
+ 2. **Async Processing**
494
+ ```python
495
+ async def process_pdf(file_path: str):
496
+ # Async PDF processing
497
+ return await asyncio.to_thread(read_pdf, file_path)
498
+ ```
499
+
500
+ 3. **Microservices**
501
+ ```
502
+ Each tool as separate service
503
+ API gateway for routing
504
+ Service mesh for communication
505
+ ```
506
+
507
+ 4. **Monitoring**
508
+ ```
509
+ Prometheus metrics
510
+ Grafana dashboards
511
+ Error tracking (Sentry)
512
+ ```
513
+
514
+ ---
515
+
516
+ ## 📝 Design Principles
517
+
518
+ ### SOLID Principles
519
+
520
+ - **Single Responsibility:** Each tool does one thing
521
+ - **Open/Closed:** Easy to add new tools
522
+ - **Liskov Substitution:** Tools are interchangeable
523
+ - **Interface Segregation:** Minimal tool interfaces
524
+ - **Dependency Inversion:** Tools depend on abstractions
525
+
526
+ ### Clean Architecture
527
+
528
+ - **Independent of Frameworks:** Core logic separate from MCP
529
+ - **Testable:** Can test without MCP server
530
+ - **Independent of UI:** Works with any MCP client
531
+ - **Independent of Database:** No database coupling
532
+
533
+ ---
534
+
535
+ ## 🎯 Architectural Goals
536
+
537
+ ✅ **Achieved:**
538
+ - Modular design
539
+ - Easy to extend
540
+ - Well-documented
541
+ - Testable
542
+ - Production-ready
543
+
544
+ 🔄 **In Progress:**
545
+ - Performance optimization
546
+ - Enhanced caching
547
+ - Better error handling
548
+
549
+ 🎯 **Future:**
550
+ - Multi-tenancy
551
+ - Distributed processing
552
+ - Advanced monitoring
553
+ - Auto-scaling
554
+
555
+ ---
556
+
557
+ **MissionControlMCP Architecture Documentation v1.0** 🏗️
CONTRIBUTING.md ADDED
@@ -0,0 +1,529 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 🤝 Contributing to MissionControlMCP
2
+
3
+ Thank you for considering contributing to MissionControlMCP! This document provides guidelines for contributing to the project.
4
+
5
+ ---
6
+
7
+ ## 📋 Table of Contents
8
+
9
+ - [Code of Conduct](#code-of-conduct)
10
+ - [Getting Started](#getting-started)
11
+ - [Development Setup](#development-setup)
12
+ - [How to Contribute](#how-to-contribute)
13
+ - [Coding Standards](#coding-standards)
14
+ - [Testing Guidelines](#testing-guidelines)
15
+ - [Pull Request Process](#pull-request-process)
16
+ - [Reporting Bugs](#reporting-bugs)
17
+ - [Suggesting Features](#suggesting-features)
18
+
19
+ ---
20
+
21
+ ## 📜 Code of Conduct
22
+
23
+ This project adheres to a code of conduct. By participating, you are expected to uphold this code:
24
+
25
+ - **Be Respectful:** Treat everyone with respect and consideration
26
+ - **Be Constructive:** Provide helpful feedback and suggestions
27
+ - **Be Collaborative:** Work together towards common goals
28
+ - **Be Professional:** Maintain professionalism in all interactions
29
+
30
+ ---
31
+
32
+ ## 🚀 Getting Started
33
+
34
+ ### Prerequisites
35
+
36
+ - Python 3.11 or higher
37
+ - Git
38
+ - Basic knowledge of Python and MCP protocol
39
+
40
+ ### Fork and Clone
41
+
42
+ 1. Fork the repository on GitHub
43
+ 2. Clone your fork locally:
44
+ ```bash
45
+ git clone https://github.com/YOUR_USERNAME/CleanEye-Hackathon.git
46
+ cd CleanEye-Hackathon/mission_control_mcp
47
+ ```
48
+
49
+ 3. Add upstream remote:
50
+ ```bash
51
+ git remote add upstream https://github.com/AlBaraa-1/CleanEye-Hackathon.git
52
+ ```
53
+
54
+ ---
55
+
56
+ ## 💻 Development Setup
57
+
58
+ ### 1. Create Virtual Environment
59
+
60
+ ```bash
61
+ python -m venv venv
62
+
63
+ # Windows
64
+ venv\Scripts\activate
65
+
66
+ # Linux/Mac
67
+ source venv/bin/activate
68
+ ```
69
+
70
+ ### 2. Install Dependencies
71
+
72
+ ```bash
73
+ pip install -r requirements.txt
74
+ ```
75
+
76
+ ### 3. Install Development Dependencies
77
+
78
+ ```bash
79
+ pip install pytest black flake8 mypy
80
+ ```
81
+
82
+ ### 4. Run Tests
83
+
84
+ ```bash
85
+ python demo.py
86
+ ```
87
+
88
+ ---
89
+
90
+ ## 🛠️ How to Contribute
91
+
92
+ ### Types of Contributions
93
+
94
+ We welcome:
95
+
96
+ 1. **Bug Fixes** - Fix issues in existing tools
97
+ 2. **New Tools** - Add new MCP tools
98
+ 3. **Documentation** - Improve docs and examples
99
+ 4. **Tests** - Add or improve test coverage
100
+ 5. **Performance** - Optimize existing code
101
+ 6. **Examples** - Add real-world use cases
102
+
103
+ ---
104
+
105
+ ## 📝 Coding Standards
106
+
107
+ ### Python Style Guide
108
+
109
+ We follow [PEP 8](https://pep8.org/) with these specifics:
110
+
111
+ **Formatting:**
112
+ ```python
113
+ # Good
114
+ def function_name(param1: str, param2: int) -> Dict[str, Any]:
115
+ """
116
+ Function description.
117
+
118
+ Args:
119
+ param1: Parameter description
120
+ param2: Parameter description
121
+
122
+ Returns:
123
+ Dictionary with results
124
+ """
125
+ result = {"key": "value"}
126
+ return result
127
+
128
+ # Bad
129
+ def functionName(param1,param2):
130
+ result={"key":"value"}
131
+ return result
132
+ ```
133
+
134
+ **Use Black for Formatting:**
135
+ ```bash
136
+ black tools/your_tool.py
137
+ ```
138
+
139
+ **Type Hints:**
140
+ ```python
141
+ from typing import Dict, Any, List, Optional
142
+
143
+ def process_data(data: List[str], limit: Optional[int] = None) -> Dict[str, Any]:
144
+ ...
145
+ ```
146
+
147
+ **Docstrings:**
148
+ ```python
149
+ def my_function(param: str) -> Dict[str, Any]:
150
+ """
151
+ Brief description (one line).
152
+
153
+ Longer description if needed explaining the function's
154
+ purpose, behavior, and any important details.
155
+
156
+ Args:
157
+ param: Description of parameter
158
+
159
+ Returns:
160
+ Description of return value
161
+
162
+ Raises:
163
+ ValueError: When invalid input
164
+ FileNotFoundError: When file not found
165
+
166
+ Example:
167
+ >>> result = my_function("example")
168
+ >>> print(result['key'])
169
+ 'value'
170
+ """
171
+ ...
172
+ ```
173
+
174
+ ---
175
+
176
+ ## ✅ Testing Guidelines
177
+
178
+ ### Writing Tests
179
+
180
+ All new tools must include tests:
181
+
182
+ **1. Create Test File:**
183
+ ```python
184
+ # tests/test_your_tool.py
185
+ import pytest
186
+ from tools.your_tool import your_function
187
+
188
+ def test_your_function_success():
189
+ """Test successful operation"""
190
+ result = your_function("valid_input")
191
+ assert result['success'] == True
192
+ assert 'data' in result
193
+
194
+ def test_your_function_error():
195
+ """Test error handling"""
196
+ with pytest.raises(ValueError):
197
+ your_function("invalid_input")
198
+ ```
199
+
200
+ **2. Run Tests:**
201
+ ```bash
202
+ pytest tests/test_your_tool.py -v
203
+ ```
204
+
205
+ ### Test Coverage
206
+
207
+ Aim for 90%+ coverage:
208
+ ```bash
209
+ pytest --cov=tools tests/
210
+ ```
211
+
212
+ ### Test Categories
213
+
214
+ - **Unit Tests** - Test individual functions
215
+ - **Integration Tests** - Test tool combinations
216
+ - **MCP Tests** - Test MCP protocol integration
217
+
218
+ ---
219
+
220
+ ## 🔄 Pull Request Process
221
+
222
+ ### 1. Create Feature Branch
223
+
224
+ ```bash
225
+ git checkout -b feature/your-feature-name
226
+ # or
227
+ git checkout -b fix/bug-description
228
+ ```
229
+
230
+ ### 2. Make Changes
231
+
232
+ - Write code following style guide
233
+ - Add tests for new functionality
234
+ - Update documentation
235
+ - Run tests locally
236
+
237
+ ### 3. Commit Changes
238
+
239
+ Use clear commit messages:
240
+ ```bash
241
+ git add .
242
+ git commit -m "Add: New email sentiment analysis tool"
243
+ # or
244
+ git commit -m "Fix: PDF reader handling encrypted files"
245
+ # or
246
+ git commit -m "Docs: Update API reference for web fetcher"
247
+ ```
248
+
249
+ **Commit Message Format:**
250
+ - `Add:` - New features
251
+ - `Fix:` - Bug fixes
252
+ - `Docs:` - Documentation changes
253
+ - `Test:` - Test additions/changes
254
+ - `Refactor:` - Code refactoring
255
+ - `Perf:` - Performance improvements
256
+
257
+ ### 4. Push to Fork
258
+
259
+ ```bash
260
+ git push origin feature/your-feature-name
261
+ ```
262
+
263
+ ### 5. Create Pull Request
264
+
265
+ 1. Go to GitHub repository
266
+ 2. Click "New Pull Request"
267
+ 3. Select your branch
268
+ 4. Fill in PR template:
269
+
270
+ ```markdown
271
+ ## Description
272
+ Brief description of changes
273
+
274
+ ## Type of Change
275
+ - [ ] Bug fix
276
+ - [ ] New feature
277
+ - [ ] Documentation update
278
+ - [ ] Performance improvement
279
+
280
+ ## Testing
281
+ - [ ] All tests pass
282
+ - [ ] New tests added
283
+ - [ ] Manual testing completed
284
+
285
+ ## Checklist
286
+ - [ ] Code follows style guide
287
+ - [ ] Documentation updated
288
+ - [ ] Tests added/updated
289
+ - [ ] No breaking changes
290
+ ```
291
+
292
+ ### 6. Code Review
293
+
294
+ - Address reviewer feedback
295
+ - Make requested changes
296
+ - Push updates to same branch
297
+
298
+ ### 7. Merge
299
+
300
+ Once approved, maintainers will merge your PR.
301
+
302
+ ---
303
+
304
+ ## 🐛 Reporting Bugs
305
+
306
+ ### Before Submitting
307
+
308
+ 1. Check existing issues
309
+ 2. Verify bug in latest version
310
+ 3. Gather reproduction steps
311
+
312
+ ### Bug Report Template
313
+
314
+ ```markdown
315
+ **Bug Description**
316
+ Clear description of the bug
317
+
318
+ **To Reproduce**
319
+ Steps to reproduce:
320
+ 1. Run command '...'
321
+ 2. Call function '...'
322
+ 3. See error
323
+
324
+ **Expected Behavior**
325
+ What should happen
326
+
327
+ **Actual Behavior**
328
+ What actually happens
329
+
330
+ **Environment**
331
+ - OS: Windows 11
332
+ - Python: 3.12
333
+ - MCP Version: 1.0.0
334
+
335
+ **Error Messages**
336
+ ```
337
+ Paste error messages here
338
+ ```
339
+
340
+ **Additional Context**
341
+ Any other relevant information
342
+ ```
343
+
344
+ ---
345
+
346
+ ## 💡 Suggesting Features
347
+
348
+ ### Feature Request Template
349
+
350
+ ```markdown
351
+ **Feature Description**
352
+ What feature would you like to see?
353
+
354
+ **Use Case**
355
+ Why is this feature needed? How will it be used?
356
+
357
+ **Proposed Solution**
358
+ How should this feature work?
359
+
360
+ **Alternatives Considered**
361
+ What other approaches did you consider?
362
+
363
+ **Additional Context**
364
+ Any mockups, examples, or references
365
+ ```
366
+
367
+ ---
368
+
369
+ ## 🏗️ Adding New Tools
370
+
371
+ ### Tool Structure
372
+
373
+ ```python
374
+ # tools/my_new_tool.py
375
+ """
376
+ Tool Name - Brief description
377
+ """
378
+ import logging
379
+ from typing import Dict, Any
380
+
381
+ logger = logging.getLogger(__name__)
382
+
383
+ def my_tool_function(param: str) -> Dict[str, Any]:
384
+ """
385
+ Tool description.
386
+
387
+ Args:
388
+ param: Parameter description
389
+
390
+ Returns:
391
+ Dictionary with results
392
+ """
393
+ try:
394
+ # Implementation
395
+ result = process_data(param)
396
+
397
+ return {
398
+ "success": True,
399
+ "data": result,
400
+ "metadata": {}
401
+ }
402
+
403
+ except Exception as e:
404
+ logger.error(f"Error in my_tool: {e}")
405
+ raise
406
+ ```
407
+
408
+ ### Register Tool in MCP Server
409
+
410
+ ```python
411
+ # mcp_server.py
412
+ from tools.my_new_tool import my_tool_function
413
+
414
+ # In tool registration section:
415
+ server.register_tool(
416
+ name="my_tool",
417
+ description="What this tool does",
418
+ input_schema={
419
+ "type": "object",
420
+ "properties": {
421
+ "param": {"type": "string", "description": "Param description"}
422
+ },
423
+ "required": ["param"]
424
+ }
425
+ )
426
+ ```
427
+
428
+ ### Add Tests
429
+
430
+ ```python
431
+ # tests/test_my_tool.py
432
+ def test_my_tool():
433
+ result = my_tool_function("test_input")
434
+ assert result['success'] == True
435
+ ```
436
+
437
+ ### Update Documentation
438
+
439
+ 1. Add to README.md tool list
440
+ 2. Add to API.md reference
441
+ 3. Add to EXAMPLES.md with use case
442
+ 4. Add sample files to examples/
443
+
444
+ ---
445
+
446
+ ## 📚 Documentation Guidelines
447
+
448
+ ### What to Document
449
+
450
+ - **README.md** - Overview, setup, quick start
451
+ - **API.md** - Complete function signatures
452
+ - **EXAMPLES.md** - Real-world use cases
453
+ - **TESTING.md** - How to test
454
+ - **Code Comments** - Complex logic explanation
455
+
456
+ ### Documentation Style
457
+
458
+ ```python
459
+ # Good - Clear and concise
460
+ def calculate_total(items: List[float]) -> float:
461
+ """Calculate the sum of item prices."""
462
+ return sum(items)
463
+
464
+ # Bad - Over-documented
465
+ def calculate_total(items: List[float]) -> float:
466
+ """
467
+ This function takes a list of items and calculates the total
468
+ by iterating through each item and adding them together using
469
+ the built-in sum function and then returns the result.
470
+ """
471
+ return sum(items)
472
+ ```
473
+
474
+ ---
475
+
476
+ ## 🎯 Development Workflow
477
+
478
+ ### Typical Workflow
479
+
480
+ 1. **Check Issues** - Find or create issue
481
+ 2. **Discuss** - Comment on issue before starting
482
+ 3. **Branch** - Create feature branch
483
+ 4. **Develop** - Write code + tests
484
+ 5. **Test** - Run all tests locally
485
+ 6. **Document** - Update docs
486
+ 7. **Commit** - Clear commit messages
487
+ 8. **Push** - Push to your fork
488
+ 9. **PR** - Create pull request
489
+ 10. **Review** - Address feedback
490
+ 11. **Merge** - Maintainer merges
491
+
492
+ ### Stay in Sync
493
+
494
+ ```bash
495
+ # Pull latest changes from upstream
496
+ git fetch upstream
497
+ git checkout main
498
+ git merge upstream/main
499
+ git push origin main
500
+ ```
501
+
502
+ ---
503
+
504
+ ## 🏆 Recognition
505
+
506
+ Contributors will be:
507
+ - Listed in README.md contributors section
508
+ - Mentioned in release notes
509
+ - Credited in commit history
510
+
511
+ ---
512
+
513
+ ## 📞 Getting Help
514
+
515
+ - **Questions:** Open a GitHub Discussion
516
+ - **Chat:** Join our Discord (link in README)
517
+ - **Issues:** GitHub Issues for bugs/features
518
+
519
+ ---
520
+
521
+ ## 📄 License
522
+
523
+ By contributing, you agree that your contributions will be licensed under the MIT License.
524
+
525
+ ---
526
+
527
+ **Thank you for contributing to MissionControlMCP!** 🚀
528
+
529
+ Every contribution, no matter how small, helps make this project better for everyone.
EXAMPLES.md ADDED
@@ -0,0 +1,319 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 💼 Real-World Use Cases & Examples
2
+
3
+ This document showcases practical, real-world applications of MissionControlMCP's tools.
4
+
5
+ ---
6
+
7
+ ## 🏢 Enterprise Use Cases
8
+
9
+ ### Use Case 1: Automated Report Generation
10
+ **Scenario:** Monthly business reporting automation
11
+
12
+ **Workflow:**
13
+ 1. **pdf_reader** → Extract data from quarterly reports
14
+ 2. **text_extractor** → Summarize key findings
15
+ 3. **kpi_generator** → Calculate business metrics
16
+ 4. **data_visualizer** → Create performance charts
17
+
18
+ **Business Value:** Saves 10+ hours per month of manual work
19
+
20
+ ---
21
+
22
+ ### Use Case 2: Customer Support Intelligence
23
+ **Scenario:** Automated email triage and routing
24
+
25
+ **Workflow:**
26
+ 1. **email_intent_classifier** → Categorize incoming emails
27
+ 2. Route based on intent:
28
+ - Complaints → Priority queue
29
+ - Inquiries → Sales team
30
+ - Urgent → Immediate escalation
31
+
32
+ **Business Value:** 80% faster email routing, improved response times
33
+
34
+ ---
35
+
36
+ ### Use Case 3: Market Research Automation
37
+ **Scenario:** Competitive analysis from web sources
38
+
39
+ **Workflow:**
40
+ 1. **web_fetcher** → Collect competitor website content
41
+ 2. **text_extractor** → Extract key information
42
+ 3. **rag_search** → Find relevant insights across sources
43
+ 4. **text_extractor** → Generate executive summary
44
+
45
+ **Business Value:** Real-time market intelligence, faster decision making
46
+
47
+ ---
48
+
49
+ ### Use Case 4: Knowledge Base Search
50
+ **Scenario:** Internal document search system
51
+
52
+ **Workflow:**
53
+ 1. **pdf_reader** → Index company documents
54
+ 2. **rag_search** → Semantic search across knowledge base
55
+ 3. Find relevant information even with different wording
56
+
57
+ **Business Value:** Instant access to company knowledge, reduced information silos
58
+
59
+ ---
60
+
61
+ ### Use Case 5: Data Analysis Pipeline
62
+ **Scenario:** Convert and visualize business data
63
+
64
+ **Workflow:**
65
+ 1. **file_converter** → Convert PDF reports to CSV
66
+ 2. **data_visualizer** → Generate trend charts
67
+ 3. **kpi_generator** → Calculate performance metrics
68
+
69
+ **Business Value:** Automated data transformation, visual insights
70
+
71
+ ---
72
+
73
+ ## 🎯 Specific Examples
74
+
75
+ ### Example 1: Text Processing Chain
76
+
77
+ **Input:**
78
+ ```
79
+ Long technical document with 5000 words about machine learning algorithms...
80
+ ```
81
+
82
+ **Processing:**
83
+ ```python
84
+ # Step 1: Clean the text
85
+ cleaned = text_extractor(text, operation="clean")
86
+
87
+ # Step 2: Extract keywords
88
+ keywords = text_extractor(text, operation="keywords")
89
+
90
+ # Step 3: Create summary
91
+ summary = text_extractor(text, operation="summarize", max_length=300)
92
+ ```
93
+
94
+ **Output:**
95
+ - Clean text: Formatted, ready for analysis
96
+ - Keywords: "machine learning, neural networks, algorithms, training, optimization"
97
+ - Summary: 300-word executive summary
98
+
99
+ ---
100
+
101
+ ### Example 2: Business Intelligence Dashboard
102
+
103
+ **Input Data:**
104
+ ```json
105
+ {
106
+ "revenue": 5000000,
107
+ "costs": 3000000,
108
+ "customers": 2500,
109
+ "current_revenue": 5000000,
110
+ "previous_revenue": 4200000,
111
+ "employees": 50
112
+ }
113
+ ```
114
+
115
+ **Processing:**
116
+ ```python
117
+ # Generate KPIs
118
+ kpis = kpi_generator(data, metrics=["revenue", "growth", "efficiency"])
119
+
120
+ # Visualize monthly trends
121
+ chart = data_visualizer(monthly_data, chart_type="line", title="Revenue Trends")
122
+ ```
123
+
124
+ **Output:**
125
+ - Profit margin: 40%
126
+ - Revenue growth: 19%
127
+ - Revenue per employee: $100,000
128
+ - Interactive chart showing trends
129
+
130
+ ---
131
+
132
+ ### Example 3: Email Routing System
133
+
134
+ **Sample Emails:**
135
+
136
+ 1. **"I need help with my order #12345 that hasn't arrived"**
137
+ - Intent: `complaint` + `order` (Confidence: 0.8)
138
+ - Action: Route to support + Priority flag
139
+
140
+ 2. **"Can we schedule a meeting to discuss the proposal?"**
141
+ - Intent: `meeting` (Confidence: 0.9)
142
+ - Action: Route to calendar system
143
+
144
+ 3. **"URGENT: Server down, customers can't access site"**
145
+ - Intent: `urgent` + `complaint` (Confidence: 1.0)
146
+ - Action: Immediate escalation to DevOps
147
+
148
+ ---
149
+
150
+ ### Example 4: Research Assistant Workflow
151
+
152
+ **Task:** Research "AI safety frameworks"
153
+
154
+ **Automated Process:**
155
+ ```python
156
+ # 1. Fetch relevant articles
157
+ urls = ["https://ai-safety-org.com/frameworks",
158
+ "https://research-institute.edu/ai-ethics"]
159
+ articles = [web_fetcher(url) for url in urls]
160
+
161
+ # 2. Extract content
162
+ summaries = [text_extractor(article, operation="summarize")
163
+ for article in articles]
164
+
165
+ # 3. Semantic search across all content
166
+ insights = rag_search("governance frameworks", summaries, top_k=5)
167
+
168
+ # 4. Generate final report
169
+ report = text_extractor(combined_insights, operation="summarize")
170
+ ```
171
+
172
+ **Result:** Comprehensive research report in minutes
173
+
174
+ ---
175
+
176
+ ### Example 5: Document Processing Pipeline
177
+
178
+ **Scenario:** Process 100 contract PDFs
179
+
180
+ **Automated Workflow:**
181
+ ```python
182
+ for contract in contracts:
183
+ # Extract text from PDF
184
+ text = pdf_reader(contract)
185
+
186
+ # Extract key terms
187
+ keywords = text_extractor(text, operation="keywords")
188
+
189
+ # Search for specific clauses
190
+ results = rag_search("termination clause", [text], top_k=1)
191
+
192
+ # Store in database
193
+ save_to_database(contract_id, text, keywords, results)
194
+ ```
195
+
196
+ **Business Impact:**
197
+ - Manual processing: 5 minutes/contract = 8.3 hours
198
+ - Automated: 10 seconds/contract = 17 minutes
199
+ - Time saved: 90%
200
+
201
+ ---
202
+
203
+ ## 📊 ROI Examples
204
+
205
+ ### Small Business (10 employees)
206
+ **Monthly Automation Savings:**
207
+ - Email classification: 20 hours → $600
208
+ - Report generation: 15 hours → $450
209
+ - Data analysis: 10 hours → $300
210
+ - **Total: 45 hours/$1,350 per month**
211
+
212
+ ### Enterprise (500 employees)
213
+ **Annual Automation Value:**
214
+ - Customer support efficiency: $500K
215
+ - Knowledge management: $300K
216
+ - Business intelligence: $400K
217
+ - **Total: $1.2M annually**
218
+
219
+ ---
220
+
221
+ ## 🎓 Learning Path
222
+
223
+ ### Beginner: Start Simple
224
+ 1. Try **text_extractor** with a sample document
225
+ 2. Use **email_intent_classifier** on sample emails
226
+ 3. Create a basic chart with **data_visualizer**
227
+
228
+ ### Intermediate: Build Workflows
229
+ 1. Combine **web_fetcher** + **text_extractor**
230
+ 2. Set up **rag_search** with your documents
231
+ 3. Create a KPI dashboard with **kpi_generator**
232
+
233
+ ### Advanced: Full Automation
234
+ 1. Build complete document processing pipelines
235
+ 2. Implement intelligent email routing systems
236
+ 3. Create real-time business intelligence dashboards
237
+
238
+ ---
239
+
240
+ ## 🔗 Integration Examples
241
+
242
+ ### With Claude Desktop
243
+ ```json
244
+ {
245
+ "mcpServers": {
246
+ "mission-control": {
247
+ "command": "python",
248
+ "args": ["path/to/mcp_server.py"]
249
+ }
250
+ }
251
+ }
252
+ ```
253
+
254
+ **Usage in Claude:**
255
+ - "Extract text from this PDF and summarize it"
256
+ - "Fetch this website and find information about pricing"
257
+ - "Calculate KPIs from this business data"
258
+
259
+ ---
260
+
261
+ ## 🚀 Quick Start Templates
262
+
263
+ ### Template 1: Document Summarizer
264
+ ```python
265
+ from tools.pdf_reader import read_pdf
266
+ from tools.text_extractor import extract_text
267
+
268
+ # Read PDF
269
+ content = read_pdf("document.pdf")
270
+
271
+ # Generate summary
272
+ summary = extract_text(content["text"],
273
+ operation="summarize",
274
+ max_length=500)
275
+
276
+ print(summary["result"])
277
+ ```
278
+
279
+ ### Template 2: Web Research Assistant
280
+ ```python
281
+ from tools.web_fetcher import fetch_web_content
282
+ from tools.rag_search import search_documents
283
+
284
+ # Fetch multiple sources
285
+ urls = ["url1", "url2", "url3"]
286
+ docs = [fetch_web_content(url)["content"] for url in urls]
287
+
288
+ # Search for specific information
289
+ results = search_documents("your query", docs, top_k=3)
290
+ ```
291
+
292
+ ### Template 3: Business Dashboard
293
+ ```python
294
+ from tools.kpi_generator import generate_kpis
295
+ from tools.data_visualizer import visualize_data
296
+
297
+ # Calculate KPIs
298
+ kpis = generate_kpis(business_data,
299
+ metrics=["revenue", "growth"])
300
+
301
+ # Visualize trends
302
+ chart = visualize_data(trend_data,
303
+ chart_type="line",
304
+ title="Q4 Performance")
305
+ ```
306
+
307
+ ---
308
+
309
+ ## 💡 Tips for Success
310
+
311
+ 1. **Chain Tools Together** - Combine multiple tools for powerful workflows
312
+ 2. **Use RAG Search** - Best for finding information across documents
313
+ 3. **Automate Repetitive Tasks** - Perfect for daily/weekly operations
314
+ 4. **Start Small** - Test individual tools before building complex systems
315
+ 5. **Monitor Performance** - Track time/cost savings from automation
316
+
317
+ ---
318
+
319
+ **Ready to automate your enterprise workflows? Start with these examples!** 🚀
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2025 AlBaraa-1
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md CHANGED
@@ -1,12 +1,545 @@
1
  ---
2
- title: MissionControlMCP
3
- emoji: 🐢
4
- colorFrom: green
5
- colorTo: red
6
  sdk: gradio
7
- sdk_version: 5.49.1
8
  app_file: app.py
9
  pinned: false
 
 
 
 
 
 
 
 
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: MissionControlMCP - Enterprise Automation Tools
3
+ emoji: 🚀
4
+ colorFrom: blue
5
+ colorTo: purple
6
  sdk: gradio
7
+ sdk_version: "5.48.0"
8
  app_file: app.py
9
  pinned: false
10
+ tags:
11
+ - building-mcp-track-enterprise
12
+ - mcp-in-action-track-enterprise
13
+ - mcp
14
+ - anthropic
15
+ - enterprise-automation
16
+ - gradio-hackathon
17
+ - ai-agents
18
+ - mcp-server
19
  ---
20
 
21
+ # 🚀 MissionControlMCP
22
+
23
+ **Enterprise Automation MCP Server for Document Analysis, Data Processing & Business Intelligence**
24
+
25
+ A fully functional Model Context Protocol (MCP) server providing 8 powerful enterprise automation tools for document processing, web scraping, semantic search, data visualization, and business analytics.
26
+
27
+ Built for the **MCP 1st Birthday Hackathon – Winter 2025** (Tracks: Building MCP + MCP in Action - Enterprise).
28
+
29
+ 🏆 **Hackathon Submission** | 🔧 **Both Tracks** | 🏢 **Enterprise Category**
30
+
31
+ ---
32
+
33
+ ## 📱 Social Media & Links
34
+
35
+ - 🔗 **LinkedIn Post:** [View Announcement](https://www.linkedin.com/posts/albaraa-alolabi_mcphackathon-gradiohackathon-huggingface-activity-7395722042223886336-kp7K?utm_source=share&utm_medium=member_desktop)
36
+ - 🚀 **Live Demo:** [Try on Hugging Face](https://huggingface.co/spaces/AlBaraa63/8_tools)
37
+ - 💻 **GitHub Repository:** [Source Code](https://github.com/AlBaraa-1/CleanEye-Hackathon)
38
+
39
+ ---
40
+
41
+ ## 📋 Table of Contents
42
+
43
+ - [Overview](#overview)
44
+ - [Features](#features)
45
+ - [Tools](#tools)
46
+ - [Installation](#installation)
47
+ - [Usage](#usage)
48
+ - [Tool Examples](#tool-examples)
49
+ - [Claude Desktop Integration](#claude-desktop-integration)
50
+ - [Development](#development)
51
+ - [Testing](#testing)
52
+ - [Architecture](#architecture)
53
+ - [Hackathon Submission](#hackathon-submission)
54
+
55
+ ---
56
+
57
+ ## 🎯 Overview
58
+
59
+ **MissionControlMCP** is an enterprise-grade MCP server that provides intelligent automation capabilities through 8 specialized tools. It enables AI assistants like Claude to perform complex document processing, data analysis, web research, and business intelligence tasks.
60
+
61
+ ### Key Capabilities
62
+
63
+ - **📄 Document Processing**: Extract text from PDFs, process and summarize content
64
+ - **🌐 Web Intelligence**: Fetch and parse web content with clean text extraction
65
+ - **🔍 Semantic Search**: RAG-based vector search using FAISS and sentence transformers
66
+ - **📊 Data Visualization**: Generate charts from CSV/JSON data
67
+ - **🔄 File Conversion**: Convert between PDF, TXT, and CSV formats
68
+ - **📧 Email Classification**: Classify email intents using NLP
69
+ - **📈 KPI Generation**: Calculate business metrics and generate insights
70
+
71
+ ---
72
+
73
+ ## 🧪 Quick Test
74
+
75
+ ```bash
76
+ # Test all tools with sample files
77
+ python demo.py
78
+ ```
79
+
80
+ **See [TESTING.md](TESTING.md) for complete testing guide with examples!**
81
+
82
+ ---
83
+
84
+ ## ✨ Features
85
+
86
+ - ✅ **8 Production-Ready Tools** for enterprise automation
87
+ - ✅ **MCP Compliant** - Works with Claude Desktop and any MCP client
88
+ - ✅ **Type-Safe** - Built with Python 3.11+ and type hints
89
+ - ✅ **Modular Architecture** - Clean separation of concerns
90
+ - ✅ **Comprehensive Testing** - Test suite included
91
+ - ✅ **Well Documented** - Clear schemas and examples
92
+ - ✅ **Vector Search** - RAG implementation with FAISS
93
+ - ✅ **Data Visualization** - Base64 encoded chart generation
94
+ - ✅ **NLP Classification** - Rule-based intent detection
95
+
96
+ ---
97
+
98
+ ## 🛠️ Tools
99
+
100
+ ### 1. **pdf_reader**
101
+ Extract text and metadata from PDF files.
102
+
103
+ **Input:**
104
+ - `file_path`: Path to PDF file
105
+
106
+ **Output:**
107
+ - Extracted text from all pages
108
+ - Page count
109
+ - Document metadata (author, title, dates)
110
+
111
+ ---
112
+
113
+ ### 2. **text_extractor**
114
+ Process and extract information from text.
115
+
116
+ **Input:**
117
+ - `text`: Raw text to process
118
+ - `operation`: 'clean', 'summarize', 'chunk', or 'keywords'
119
+ - `max_length`: Max length for summaries (default: 500)
120
+
121
+ **Output:**
122
+ - Processed text
123
+ - Word count
124
+ - Operation metadata
125
+
126
+ ---
127
+
128
+ ### 3. **web_fetcher**
129
+ Fetch and extract content from web URLs.
130
+
131
+ **Input:**
132
+ - `url`: URL to fetch
133
+ - `extract_text_only`: Extract text only (default: true)
134
+
135
+ **Output:**
136
+ - Clean text content or HTML
137
+ - HTTP status code
138
+ - Response metadata
139
+
140
+ ---
141
+
142
+ ### 4. **rag_search**
143
+ Semantic search using RAG (Retrieval Augmented Generation).
144
+
145
+ **Input:**
146
+ - `query`: Search query
147
+ - `documents`: List of documents to search
148
+ - `top_k`: Number of results (default: 3)
149
+
150
+ **Output:**
151
+ - Ranked search results with similarity scores
152
+ - Document snippets
153
+ - Relevance rankings
154
+
155
+ ---
156
+
157
+ ### 5. **data_visualizer**
158
+ Create data visualizations and charts.
159
+
160
+ **Input:**
161
+ - `data`: JSON or CSV string data
162
+ - `chart_type`: 'bar', 'line', 'pie', or 'scatter'
163
+ - `x_column`, `y_column`: Column names
164
+ - `title`: Chart title
165
+
166
+ **Output:**
167
+ - Base64 encoded PNG image
168
+ - Chart dimensions
169
+ - Column information
170
+
171
+ ---
172
+
173
+ ### 6. **file_converter**
174
+ Convert files between formats.
175
+
176
+ **Input:**
177
+ - `input_path`: Path to input file
178
+ - `output_format`: 'txt', 'csv', or 'pdf'
179
+ - `output_path`: Optional output path
180
+
181
+ **Output:**
182
+ - Output file path
183
+ - Conversion status
184
+ - File size
185
+
186
+ **Supported Conversions:**
187
+ - PDF → TXT
188
+ - TXT → CSV
189
+ - CSV → TXT
190
+
191
+ ---
192
+
193
+ ### 7. **email_intent_classifier**
194
+ Classify email intent using NLP.
195
+
196
+ **Input:**
197
+ - `email_text`: Email content to classify
198
+
199
+ **Output:**
200
+ - Primary intent (inquiry, complaint, request, feedback, meeting, order, urgent, follow_up, thank_you, application)
201
+ - Confidence score
202
+ - Secondary intents
203
+
204
+ ---
205
+
206
+ ### 8. **kpi_generator**
207
+ Generate business KPIs and insights.
208
+
209
+ **Input:**
210
+ - `data`: JSON string with business data
211
+ - `metrics`: List of metrics - 'revenue', 'growth', 'efficiency', 'customer', 'operational'
212
+
213
+ **Output:**
214
+ - Calculated KPIs
215
+ - Executive summary
216
+ - Key trends and insights
217
+
218
+ ---
219
+
220
+ ## 📦 Installation
221
+
222
+ ### Prerequisites
223
+
224
+ - Python 3.11 or higher
225
+ - pip or uv package manager
226
+
227
+ ### Setup
228
+
229
+ 1. **Clone or download the repository:**
230
+
231
+ ```bash
232
+ cd mission_control_mcp
233
+ ```
234
+
235
+ 2. **Install dependencies:**
236
+
237
+ ```bash
238
+ pip install -r requirements.txt
239
+ ```
240
+
241
+ Or using `uv`:
242
+
243
+ ```bash
244
+ uv pip install -r requirements.txt
245
+ ```
246
+
247
+ ### Dependencies
248
+
249
+ - `mcp` - Model Context Protocol SDK
250
+ - `pypdf2` - PDF processing
251
+ - `requests` + `beautifulsoup4` - Web scraping
252
+ - `pandas` + `numpy` - Data processing
253
+ - `faiss-cpu` + `sentence-transformers` - Vector search
254
+ - `matplotlib` + `seaborn` - Data visualization
255
+ - `scikit-learn` + `nltk` - NLP and ML
256
+
257
+ ---
258
+
259
+ ## 🚀 Usage
260
+
261
+ ### Running the Server
262
+
263
+ #### For Development/Testing:
264
+
265
+ ```bash
266
+ uvx mcp dev mission_control_mcp/mcp_server.py
267
+ ```
268
+
269
+ Or with Python directly:
270
+
271
+ ```bash
272
+ python mcp_server.py
273
+ ```
274
+
275
+ #### For Production:
276
+
277
+ The server runs via stdio and is designed to be integrated with MCP clients like Claude Desktop.
278
+
279
+ ---
280
+
281
+ ## 💡 Tool Examples
282
+
283
+ ### Example 1: Text Extraction & Summarization
284
+
285
+ ```json
286
+ {
287
+ "tool": "text_extractor",
288
+ "arguments": {
289
+ "text": "Your long document text here...",
290
+ "operation": "summarize",
291
+ "max_length": 200
292
+ }
293
+ }
294
+ ```
295
+
296
+ ### Example 2: Web Content Fetching
297
+
298
+ ```json
299
+ {
300
+ "tool": "web_fetcher",
301
+ "arguments": {
302
+ "url": "https://example.com/article",
303
+ "extract_text_only": true
304
+ }
305
+ }
306
+ ```
307
+
308
+ ### Example 3: Semantic Search
309
+
310
+ ```json
311
+ {
312
+ "tool": "rag_search",
313
+ "arguments": {
314
+ "query": "machine learning algorithms",
315
+ "documents": [
316
+ "Document 1 about neural networks...",
317
+ "Document 2 about decision trees...",
318
+ "Document 3 about clustering..."
319
+ ],
320
+ "top_k": 3
321
+ }
322
+ }
323
+ ```
324
+
325
+ ### Example 4: Data Visualization
326
+
327
+ ```json
328
+ {
329
+ "tool": "data_visualizer",
330
+ "arguments": {
331
+ "data": "{\"month\": [\"Jan\", \"Feb\", \"Mar\"], \"sales\": [1000, 1500, 1200]}",
332
+ "chart_type": "bar",
333
+ "x_column": "month",
334
+ "y_column": "sales",
335
+ "title": "Q1 Sales Report"
336
+ }
337
+ }
338
+ ```
339
+
340
+ ### Example 5: Email Intent Classification
341
+
342
+ ```json
343
+ {
344
+ "tool": "email_intent_classifier",
345
+ "arguments": {
346
+ "email_text": "Hi, I need help with my recent order. It hasn't arrived yet and I'm wondering about the tracking status."
347
+ }
348
+ }
349
+ ```
350
+
351
+ ### Example 6: KPI Generation
352
+
353
+ ```json
354
+ {
355
+ "tool": "kpi_generator",
356
+ "arguments": {
357
+ "data": "{\"revenue\": 1000000, \"costs\": 600000, \"customers\": 500, \"current_revenue\": 1000000, \"previous_revenue\": 800000}",
358
+ "metrics": ["revenue", "growth", "efficiency"]
359
+ }
360
+ }
361
+ ```
362
+
363
+ ---
364
+
365
+ ## 🖥️ Claude Desktop Integration
366
+
367
+ ### Configuration
368
+
369
+ Add to your Claude Desktop config file (`claude_desktop_config.json`):
370
+
371
+ **Windows:** `%APPDATA%\Claude\claude_desktop_config.json`
372
+ **macOS:** `~/Library/Application Support/Claude/claude_desktop_config.json`
373
+
374
+ ```json
375
+ {
376
+ "mcpServers": {
377
+ "mission-control": {
378
+ "command": "python",
379
+ "args": [
380
+ "C:/Users/YourUser/path/to/mission_control_mcp/mcp_server.py"
381
+ ]
382
+ }
383
+ }
384
+ }
385
+ ```
386
+
387
+ Or with `uvx`:
388
+
389
+ ```json
390
+ {
391
+ "mcpServers": {
392
+ "mission-control": {
393
+ "command": "uvx",
394
+ "args": [
395
+ "mcp",
396
+ "run",
397
+ "C:/Users/YourUser/path/to/mission_control_mcp/mcp_server.py"
398
+ ]
399
+ }
400
+ }
401
+ }
402
+ ```
403
+
404
+ ### Usage in Claude
405
+
406
+ After configuration, restart Claude Desktop. You can then ask Claude to:
407
+
408
+ - "Extract text from this PDF file"
409
+ - "Fetch content from this website and summarize it"
410
+ - "Search these documents for information about X"
411
+ - "Create a bar chart from this sales data"
412
+ - "Classify the intent of this email"
413
+ - "Generate KPIs from this business data"
414
+
415
+ ---
416
+
417
+ ## 🧪 Testing
418
+
419
+ Run the comprehensive demo:
420
+
421
+ ```bash
422
+ python demo.py
423
+ ```
424
+
425
+ The demo includes:
426
+ - Text extraction and processing tests
427
+ - Web fetching tests
428
+ - RAG search demonstrations
429
+ - Data visualization generation
430
+ - Email classification examples
431
+ - KPI calculation tests
432
+ - Example JSON inputs for all tools
433
+
434
+ ---
435
+
436
+ ## 🏗️ Architecture
437
+
438
+ ```
439
+ mission_control_mcp/
440
+ ├── mcp_server.py # Main MCP server
441
+ ├── app.py # Gradio web interface
442
+ ├── demo.py # Demo & test suite
443
+ ├── requirements.txt # Dependencies
444
+ ├── README.md # Documentation
445
+
446
+ ├── tools/ # Tool implementations
447
+ │ ├── pdf_reader.py
448
+ │ ├── text_extractor.py
449
+ │ ├── web_fetcher.py
450
+ │ ├── rag_search.py
451
+ │ ├── data_visualizer.py
452
+ │ ├── file_converter.py
453
+ │ ├── email_intent_classifier.py
454
+ │ └── kpi_generator.py
455
+
456
+ ├── models/ # Data schemas
457
+ │ └── schemas.py
458
+
459
+ └── utils/ # Utilities
460
+ ├── helpers.py # Helper functions
461
+ └── rag_utils.py # RAG/vector search utilities
462
+ ```
463
+
464
+ ### Design Principles
465
+
466
+ - **Modularity**: Each tool is independently implemented
467
+ - **Type Safety**: Pydantic schemas for validation
468
+ - **Error Handling**: Comprehensive error catching and logging
469
+ - **Clean Code**: Well-documented with docstrings
470
+ - **Testability**: Easy to test individual components
471
+
472
+ ---
473
+
474
+ ## 🎖️ Hackathon Submission
475
+
476
+ ### Track 1: MCP Server
477
+
478
+ **Server Name:** MissionControlMCP
479
+
480
+ **Description:** Enterprise automation MCP server providing 8 specialized tools for document processing, web intelligence, semantic search, data visualization, and business analytics.
481
+
482
+ ### Key Features for Judges
483
+
484
+ 1. **Production-Ready**: All 8 tools are fully implemented and tested
485
+ 2. **MCP Compliant**: Follows MCP specification precisely
486
+ 3. **Real-World Value**: Solves actual enterprise automation needs
487
+ 4. **Clean Architecture**: Modular, maintainable, well-documented code
488
+ 5. **Advanced Features**: RAG search with FAISS, data visualization, NLP classification
489
+ 6. **Comprehensive Testing**: Full test suite with examples
490
+ 7. **Easy Integration**: Works seamlessly with Claude Desktop
491
+
492
+ ### Technical Highlights
493
+
494
+ - **Vector Search**: FAISS-based semantic search with sentence transformers
495
+ - **NLP Classification**: Rule-based email intent classifier with confidence scoring
496
+ - **Data Visualization**: Dynamic chart generation with matplotlib
497
+ - **File Processing**: Multi-format support (PDF, TXT, CSV)
498
+ - **Web Intelligence**: Smart web scraping with clean text extraction
499
+ - **Business Intelligence**: KPI calculation with trend analysis
500
+
501
+ ---
502
+
503
+ ## 📝 Documentation & Examples
504
+
505
+ - **[EXAMPLES.md](EXAMPLES.md)** - Real-world use cases, workflows, and ROI examples
506
+ - **[TESTING.md](TESTING.md)** - Complete testing guide with examples
507
+ - **[ARCHITECTURE.md](ARCHITECTURE.md)** - System design and architecture details
508
+ - **[API.md](API.md)** - Complete API documentation
509
+ - **[examples/](examples/)** - Sample files for testing all tools:
510
+ - `sample_report.txt` - Business report for text extraction
511
+ - `business_data.csv` - Financial data for visualization & KPIs
512
+ - `sample_email_*.txt` - Email samples for intent classification
513
+ - `sample_documents.txt` - Documents for RAG search testing
514
+
515
+ ---
516
+
517
+ ## �📝 License
518
+
519
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
520
+
521
+ Created for the MCP 1st Birthday Hackathon – Winter 2025.
522
+
523
+ ---
524
+
525
+ ## 🤝 Contributing
526
+
527
+ This project was built for the hackathon, but improvements and suggestions are welcome! Check out [EXAMPLES.md](EXAMPLES.md) for usage patterns and best practices.
528
+
529
+ ---
530
+
531
+ ## 📧 Contact
532
+
533
+ For questions about this MCP server, please reach out through the hackathon channels.
534
+
535
+ ---
536
+
537
+ ## 🌟 Acknowledgments
538
+
539
+ - Built with the [Model Context Protocol SDK](https://github.com/modelcontextprotocol)
540
+ - Powered by sentence-transformers, FAISS, and other open-source libraries
541
+ - Created for the MCP 1st Birthday Hackathon 2025
542
+
543
+ ---
544
+
545
+ **Happy Automating! 🚀**
TESTING.md ADDED
@@ -0,0 +1,267 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 🧪 Testing Guide
2
+
3
+ ## Quick Start: Test with Sample Files
4
+
5
+ We've created sample files in the `examples/` directory to demonstrate all MissionControlMCP tools.
6
+
7
+ ### Run All Tests
8
+
9
+ ```bash
10
+ python demo.py
11
+ ```
12
+
13
+ This will test:
14
+ - ✅ **Text Extraction** - Keywords & summarization from business report
15
+ - ✅ **Email Classification** - Intent detection on 3 sample emails
16
+ - ✅ **Data Visualization** - Line and bar charts from CSV data
17
+ - ✅ **KPI Generation** - Calculate business metrics
18
+ - ✅ **RAG Semantic Search** - Semantic search across documents
19
+
20
+ ---
21
+
22
+ ## Test Individual Tools
23
+
24
+ ### 1. Text Extractor
25
+ ```python
26
+ from tools.text_extractor import extract_text
27
+
28
+ # Read sample report
29
+ with open("examples/sample_report.txt", "r") as f:
30
+ text = f.read()
31
+
32
+ # Extract keywords
33
+ keywords = extract_text(text, operation="keywords")
34
+ print(keywords)
35
+
36
+ # Generate summary
37
+ summary = extract_text(text, operation="summarize", max_length=200)
38
+ print(summary['result'])
39
+ ```
40
+
41
+ ### 2. Email Intent Classifier
42
+ ```python
43
+ from tools.email_intent_classifier import classify_email_intent
44
+
45
+ # Test complaint email
46
+ with open("examples/sample_email_complaint.txt", "r") as f:
47
+ email = f.read()
48
+
49
+ result = classify_email_intent(email)
50
+ print(f"Intent: {result['intent']} (confidence: {result['confidence']})")
51
+ ```
52
+
53
+ ### 3. Data Visualizer
54
+ ```python
55
+ from tools.data_visualizer import visualize_data
56
+
57
+ # Load CSV data
58
+ with open("examples/business_data.csv", "r") as f:
59
+ data = f.read()
60
+
61
+ # Create revenue trend chart
62
+ chart = visualize_data(
63
+ data=data,
64
+ chart_type="line",
65
+ x_column="month",
66
+ y_column="revenue",
67
+ title="Revenue Trends"
68
+ )
69
+
70
+ # Save chart
71
+ import base64
72
+ with open("revenue_chart.png", "wb") as f:
73
+ f.write(base64.b64decode(chart['image_base64']))
74
+ ```
75
+
76
+ ### 4. KPI Generator
77
+ ```python
78
+ from tools.kpi_generator import generate_kpis
79
+ import json
80
+
81
+ data = {
82
+ "revenue": 5500000,
83
+ "costs": 3400000,
84
+ "customers": 2700,
85
+ "current_revenue": 5500000,
86
+ "previous_revenue": 5400000,
87
+ "employees": 50
88
+ }
89
+
90
+ result = generate_kpis(json.dumps(data), metrics=["revenue", "growth", "efficiency"])
91
+ print(f"Generated {len(result['kpis'])} KPIs")
92
+ print(result['summary'])
93
+ ```
94
+
95
+ ### 5. RAG Semantic Search
96
+ ```python
97
+ from tools.rag_search import search_documents
98
+
99
+ # Load sample documents
100
+ with open("examples/sample_documents.txt", "r") as f:
101
+ content = f.read()
102
+
103
+ documents = [doc.strip() for doc in content.split("##") if doc.strip()]
104
+
105
+ # Search
106
+ results = search_documents("What is machine learning?", documents, top_k=3)
107
+ for res in results['results']:
108
+ print(f"Score: {res['score']:.4f} - {res['document'][:100]}...")
109
+ ```
110
+
111
+ ---
112
+
113
+ ## Test with Claude Desktop
114
+
115
+ ### 1. Configure Claude Desktop
116
+
117
+ Edit `%AppData%\Claude\claude_desktop_config.json`:
118
+
119
+ ```json
120
+ {
121
+ "mcpServers": {
122
+ "mission-control": {
123
+ "command": "python",
124
+ "args": ["C:/path/to/mission_control_mcp/mcp_server.py"]
125
+ }
126
+ }
127
+ }
128
+ ```
129
+
130
+ ### 2. Restart Claude Desktop
131
+
132
+ ### 3. Try These Prompts
133
+
134
+ **Text Processing:**
135
+ ```
136
+ Extract keywords from this text: [paste sample_report.txt content]
137
+ ```
138
+
139
+ **Email Classification:**
140
+ ```
141
+ Classify this email: [paste sample_email_complaint.txt content]
142
+ ```
143
+
144
+ **Data Visualization:**
145
+ ```
146
+ Create a line chart showing revenue trends from this data: [paste business_data.csv]
147
+ ```
148
+
149
+ **KPI Generation:**
150
+ ```
151
+ Calculate KPIs from this business data: {"revenue": 5000000, "costs": 3000000, "customers": 2500}
152
+ ```
153
+
154
+ **Semantic Search:**
155
+ ```
156
+ Search these documents for information about AI: [paste sample_documents.txt]
157
+ ```
158
+
159
+ ---
160
+
161
+ ## Test MCP Server Directly
162
+
163
+ ### Run the MCP Server
164
+
165
+ ```bash
166
+ python mcp_server.py
167
+ ```
168
+
169
+ ### Test Individual Tools
170
+
171
+ ```bash
172
+ python test_individual.py
173
+ ```
174
+
175
+ This runs isolated tests on each tool (8 total).
176
+
177
+ ### MCP Server Tests
178
+
179
+ ```bash
180
+ python demo.py
181
+ ```
182
+
183
+ Tests all MCP tool handlers and server integration.
184
+
185
+ ---
186
+
187
+ ## Sample Files Overview
188
+
189
+ | File | Purpose | Tool |
190
+ |------|---------|------|
191
+ | `sample_report.txt` | Business report (2,200 chars) | Text Extractor |
192
+ | `business_data.csv` | 12 months financial data | Data Visualizer, KPI Generator |
193
+ | `sample_email_complaint.txt` | Customer complaint | Email Classifier |
194
+ | `sample_email_inquiry.txt` | Sales inquiry | Email Classifier |
195
+ | `sample_email_urgent.txt` | Urgent system alert | Email Classifier |
196
+ | `sample_documents.txt` | 5 topic documents | RAG Search |
197
+
198
+ ---
199
+
200
+ ## Expected Results
201
+
202
+ ### Text Extraction
203
+ - **Keywords:** customer, revenue, growth, operational, market, performance
204
+ - **Summary:** ~200 character executive summary
205
+
206
+ ### Email Classification
207
+ - **Complaint:** request + order intents (confidence: 1.00)
208
+ - **Inquiry:** meeting + inquiry intents (confidence: 1.00)
209
+ - **Urgent:** urgent intent (confidence: 1.00)
210
+
211
+ ### Data Visualization
212
+ - **Line Chart:** 48KB base64 PNG (1000x600px)
213
+ - **Bar Chart:** 26KB base64 PNG (1000x600px)
214
+
215
+ ### KPI Generation
216
+ - **9 KPIs calculated:** total_revenue, profit, profit_margin_percent, revenue_growth, etc.
217
+ - **Summary:** Executive insights on revenue growth and profitability
218
+
219
+ ### RAG Search
220
+ - **Query:** "What is machine learning?"
221
+ - **Top Result:** Document 1 (AI Overview) - Score: 0.56
222
+ - **Semantic matching:** Finds relevant content even with different wording
223
+
224
+ ---
225
+
226
+ ## Troubleshooting
227
+
228
+ ### FAISS Errors
229
+ ```bash
230
+ pip install faiss-cpu sentence-transformers
231
+ ```
232
+
233
+ ### Import Errors
234
+ ```bash
235
+ cd mission_control_mcp
236
+ pip install -r requirements.txt
237
+ ```
238
+
239
+ ### Python Version
240
+ Requires Python 3.11+. Check with:
241
+ ```bash
242
+ python --version
243
+ ```
244
+
245
+ ---
246
+
247
+ ## Performance Benchmarks
248
+
249
+ | Tool | Sample File | Execution Time |
250
+ |------|-------------|----------------|
251
+ | Text Extractor | 2,200 chars | ~0.5s |
252
+ | Email Classifier | 500 chars | ~0.1s |
253
+ | Data Visualizer | 12 data points | ~1.2s |
254
+ | KPI Generator | 10 metrics | ~0.3s |
255
+ | RAG Search | 6 documents | ~2.5s (first run, includes model load) |
256
+
257
+ ---
258
+
259
+ ## Next Steps
260
+
261
+ 1. ✅ Run `python demo.py` to verify all tools work
262
+ 2. ✅ Try individual tool tests with your own data
263
+ 3. ✅ Configure Claude Desktop integration
264
+ 4. ✅ Test with Claude using sample prompts
265
+ 5. ✅ Create custom workflows combining multiple tools
266
+
267
+ **Happy Testing!** 🚀
app.py ADDED
@@ -0,0 +1,864 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ 🚀 MissionControlMCP - Gradio Web Interface
3
+ Beautiful GUI demo for all 8 tools!
4
+
5
+ Run: python demo_gui.py
6
+ Then share the public URL on LinkedIn!
7
+ """
8
+
9
+ import gradio as gr
10
+ import sys
11
+ import os
12
+ import json
13
+ import base64
14
+ from io import BytesIO
15
+ from PIL import Image
16
+
17
+ # Setup paths
18
+ SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
19
+ sys.path.append(SCRIPT_DIR)
20
+ EXAMPLES_DIR = os.path.join(SCRIPT_DIR, "examples")
21
+
22
+ # Import tools
23
+ from tools.pdf_reader import read_pdf
24
+ from tools.text_extractor import extract_text
25
+ from tools.web_fetcher import fetch_web_content
26
+ from tools.rag_search import search_documents
27
+ from tools.data_visualizer import visualize_data
28
+ from tools.file_converter import convert_file
29
+ from tools.email_intent_classifier import classify_email_intent
30
+ from tools.kpi_generator import generate_kpis
31
+
32
+
33
+ # ============================================================================
34
+ # TOOL FUNCTIONS
35
+ # ============================================================================
36
+
37
+ def tool_pdf_reader(pdf_file):
38
+ """PDF Reader tool"""
39
+ try:
40
+ if pdf_file is None:
41
+ return "❌ Please upload a PDF file!", None
42
+
43
+ result = read_pdf(pdf_file.name)
44
+
45
+ output = f"""✅ **PDF Analysis Complete!**
46
+
47
+ 📄 **Metadata:**
48
+ - Pages: {result['pages']}
49
+ - Characters: {len(result['text']):,}
50
+ - Author: {result['metadata'].get('author', 'N/A')}
51
+ - Title: {result['metadata'].get('title', 'N/A')}
52
+
53
+ 📝 **Extracted Text (first 1000 chars):**
54
+ {result['text'][:1000]}...
55
+ """
56
+
57
+ # Extract keywords
58
+ keywords = extract_text(result['text'], operation="keywords")
59
+ output += f"\n\n🔑 **Keywords:** {keywords['result']}"
60
+
61
+ return output, None
62
+
63
+ except Exception as e:
64
+ return f"❌ Error: {str(e)}", None
65
+
66
+
67
+ def tool_text_extractor(text, operation, max_length):
68
+ """Text Extractor tool"""
69
+ try:
70
+ if not text.strip():
71
+ return "❌ Please enter some text!"
72
+
73
+ result = extract_text(text, operation=operation, max_length=max_length)
74
+
75
+ output = f"""✅ **Text Processing Complete!**
76
+
77
+ 📊 **Operation:** {operation.upper()}
78
+ 📏 **Word Count:** {result['word_count']}
79
+
80
+ 📝 **Result:**
81
+ {result['result']}
82
+ """
83
+
84
+ return output
85
+
86
+ except Exception as e:
87
+ return f"❌ Error: {str(e)}"
88
+
89
+
90
+ def tool_web_fetcher(url):
91
+ """Web Fetcher tool"""
92
+ try:
93
+ if not url.strip():
94
+ return "❌ Please enter a URL!"
95
+
96
+ result = fetch_web_content(url)
97
+
98
+ if result['status_code'] == 999:
99
+ return f"""⚠️ **Status 999 - Bot Detection**
100
+
101
+ The website is blocking automated requests.
102
+ This is common for LinkedIn, Facebook, etc.
103
+
104
+ Try a different website!"""
105
+
106
+ output = f"""✅ **Website Fetched Successfully!**
107
+
108
+ 🌐 **URL:** {url}
109
+ 📊 **Status:** {result['status_code']}
110
+ 📄 **Title:** {result.get('title', 'N/A')}
111
+ 📏 **Content Length:** {len(result['content']):,} characters
112
+ 🔗 **Links Found:** {len(result.get('links', []))}
113
+
114
+ 📝 **Content Preview (first 1000 chars):**
115
+ {result['content'][:1000]}...
116
+ """
117
+
118
+ # Extract keywords
119
+ if len(result['content']) > 50:
120
+ keywords = extract_text(result['content'], operation="keywords")
121
+ output += f"\n\n🔑 **Keywords:** {keywords['result']}"
122
+
123
+ return output
124
+
125
+ except Exception as e:
126
+ return f"❌ Error: {str(e)}"
127
+
128
+
129
+ def tool_rag_search(query):
130
+ """RAG Search tool"""
131
+ try:
132
+ if not query.strip():
133
+ return "❌ Please enter a search query!"
134
+
135
+ # Load sample documents
136
+ docs_file = os.path.join(EXAMPLES_DIR, "sample_documents.txt")
137
+ with open(docs_file, "r", encoding="utf-8") as f:
138
+ content = f.read()
139
+
140
+ documents = [doc.strip() for doc in content.split("##") if doc.strip()]
141
+
142
+ result = search_documents(query, documents, top_k=3)
143
+
144
+ output = f"""✅ **Search Complete!**
145
+
146
+ 🔍 **Query:** "{query}"
147
+ 📚 **Documents Searched:** {len(documents)}
148
+ 📊 **Results Found:** {len(result['results'])}
149
+
150
+ 🎯 **Top Results:**
151
+
152
+ """
153
+
154
+ for i, res in enumerate(result['results'], 1):
155
+ preview = res['document'][:200].replace('\n', ' ')
156
+ output += f"""
157
+ **Result {i}** (Score: {res['score']:.4f})
158
+ {preview}...
159
+
160
+ """
161
+
162
+ return output
163
+
164
+ except Exception as e:
165
+ return f"❌ Error: {str(e)}"
166
+
167
+
168
+ def tool_data_visualizer(csv_data, chart_type, x_col, y_col, title):
169
+ """Data Visualizer tool"""
170
+ try:
171
+ if not csv_data.strip():
172
+ return "❌ Please enter CSV data!", None
173
+
174
+ result = visualize_data(
175
+ data=csv_data,
176
+ chart_type=chart_type,
177
+ x_column=x_col,
178
+ y_column=y_col,
179
+ title=title
180
+ )
181
+
182
+ # Convert base64 to image
183
+ img_data = base64.b64decode(result['image_base64'])
184
+ image = Image.open(BytesIO(img_data))
185
+
186
+ output = f"""✅ **Chart Created!**
187
+
188
+ 📊 **Chart Type:** {chart_type.upper()}
189
+ 📏 **Dimensions:** {result['dimensions']}
190
+ 📈 **Title:** {title}
191
+ """
192
+
193
+ return output, image
194
+
195
+ except Exception as e:
196
+ return f"❌ Error: {str(e)}", None
197
+
198
+
199
+ def tool_email_classifier(email_text):
200
+ """Email Intent Classifier tool"""
201
+ try:
202
+ if not email_text.strip():
203
+ return "❌ Please enter email text!"
204
+
205
+ result = classify_email_intent(email_text)
206
+
207
+ output = f"""✅ **Email Classified!**
208
+
209
+ 🎯 **Primary Intent:** {result['intent'].upper()}
210
+ 📊 **Confidence:** {result['confidence']:.2%}
211
+
212
+ 💬 **Explanation:**
213
+ {result['explanation']}
214
+ """
215
+
216
+ if result['secondary_intents']:
217
+ output += "\n\n📋 **Secondary Intents:**\n"
218
+ for intent in result['secondary_intents'][:3]:
219
+ output += f"- {intent['intent']}: {intent['confidence']:.2%}\n"
220
+
221
+ return output
222
+
223
+ except Exception as e:
224
+ return f"❌ Error: {str(e)}"
225
+
226
+
227
+ def tool_kpi_generator(business_json, metrics):
228
+ """KPI Generator tool"""
229
+ try:
230
+ if not business_json.strip():
231
+ return "❌ Please enter business data!"
232
+
233
+ # Validate JSON
234
+ json.loads(business_json)
235
+
236
+ result = generate_kpis(business_json, metrics=metrics)
237
+
238
+ output = f"""✅ **KPIs Generated!**
239
+
240
+ 📊 **Total KPIs Calculated:** {len(result['kpis'])}
241
+
242
+ 📈 **Key Metrics:**
243
+
244
+ """
245
+
246
+ # Display top 15 KPIs
247
+ for i, (name, value) in enumerate(list(result['kpis'].items())[:15], 1):
248
+ # Format based on metric type
249
+ if 'percent' in name or 'rate' in name or 'margin' in name:
250
+ formatted = f"{value:.1f}%"
251
+ elif 'revenue' in name or 'profit' in name or 'cost' in name:
252
+ formatted = f"${value:,.0f}"
253
+ else:
254
+ formatted = f"{value:,.2f}"
255
+
256
+ display_name = name.replace('_', ' ').title()
257
+ output += f"{i}. **{display_name}:** {formatted}\n"
258
+
259
+ output += f"\n\n📝 **Executive Summary:**\n{result['summary']}"
260
+
261
+ if result.get('trends'):
262
+ output += "\n\n📊 **Key Trends:**\n"
263
+ for trend in result['trends'][:5]:
264
+ output += f"- {trend}\n"
265
+
266
+ return output
267
+
268
+ except json.JSONDecodeError:
269
+ return "❌ Invalid JSON format! Please check your data."
270
+ except Exception as e:
271
+ return f"❌ Error: {str(e)}"
272
+
273
+
274
+ # ============================================================================
275
+ # LOAD SAMPLE DATA
276
+ # ============================================================================
277
+
278
+ def load_sample_csv():
279
+ csv_file = os.path.join(EXAMPLES_DIR, "business_data.csv")
280
+ with open(csv_file, "r") as f:
281
+ return f.read()
282
+
283
+ def load_sample_email():
284
+ email_file = os.path.join(EXAMPLES_DIR, "sample_email_complaint.txt")
285
+ with open(email_file, "r", encoding="utf-8") as f:
286
+ return f.read()
287
+
288
+ def load_sample_json():
289
+ return """{
290
+ "revenue": 5500000,
291
+ "costs": 3400000,
292
+ "customers": 2700,
293
+ "current_revenue": 5500000,
294
+ "previous_revenue": 5400000,
295
+ "current_customers": 2700,
296
+ "previous_customers": 2650,
297
+ "employees": 50,
298
+ "marketing_spend": 500000,
299
+ "sales": 5500000,
300
+ "cogs": 2000000
301
+ }"""
302
+
303
+
304
+ # ============================================================================
305
+ # GRADIO INTERFACE
306
+ # ============================================================================
307
+
308
+ # Custom CSS for beautiful UI
309
+ custom_css = """
310
+ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');
311
+
312
+ .gradio-container {
313
+ font-family: 'Inter', sans-serif !important;
314
+ max-width: 1400px !important;
315
+ margin: 0 auto !important;
316
+ }
317
+
318
+ /* Header styling */
319
+ .gradio-container h1 {
320
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
321
+ -webkit-background-clip: text;
322
+ -webkit-text-fill-color: transparent;
323
+ background-clip: text;
324
+ font-size: 3em !important;
325
+ font-weight: 700 !important;
326
+ text-align: center;
327
+ margin-bottom: 0.5em;
328
+ }
329
+
330
+ /* Tab styling */
331
+ .tab-nav {
332
+ border-radius: 12px !important;
333
+ background: linear-gradient(to right, #f8f9fa, #e9ecef) !important;
334
+ padding: 8px !important;
335
+ margin-bottom: 20px !important;
336
+ }
337
+
338
+ button.selected {
339
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
340
+ color: white !important;
341
+ border-radius: 8px !important;
342
+ font-weight: 600 !important;
343
+ box-shadow: 0 4px 12px rgba(102, 126, 234, 0.4) !important;
344
+ }
345
+
346
+ /* Button styling */
347
+ .primary-btn {
348
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
349
+ border: none !important;
350
+ color: white !important;
351
+ font-weight: 600 !important;
352
+ border-radius: 10px !important;
353
+ padding: 12px 24px !important;
354
+ font-size: 16px !important;
355
+ transition: all 0.3s ease !important;
356
+ box-shadow: 0 4px 15px rgba(102, 126, 234, 0.4) !important;
357
+ }
358
+
359
+ .primary-btn:hover {
360
+ transform: translateY(-2px) !important;
361
+ box-shadow: 0 6px 20px rgba(102, 126, 234, 0.6) !important;
362
+ }
363
+
364
+ /* Input fields */
365
+ textarea, input[type="text"] {
366
+ border-radius: 10px !important;
367
+ border: 2px solid #e9ecef !important;
368
+ padding: 12px !important;
369
+ font-size: 15px !important;
370
+ transition: border-color 0.3s ease !important;
371
+ }
372
+
373
+ textarea:focus, input[type="text"]:focus {
374
+ border-color: #667eea !important;
375
+ box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1) !important;
376
+ }
377
+
378
+ /* Output boxes */
379
+ .output-class {
380
+ background: linear-gradient(to bottom, #ffffff, #f8f9fa) !important;
381
+ border-radius: 12px !important;
382
+ padding: 20px !important;
383
+ border: 2px solid #e9ecef !important;
384
+ }
385
+
386
+ /* Cards and containers */
387
+ .gr-box {
388
+ border-radius: 12px !important;
389
+ border: 1px solid #e9ecef !important;
390
+ box-shadow: 0 2px 8px rgba(0,0,0,0.05) !important;
391
+ }
392
+
393
+ /* Labels */
394
+ label {
395
+ font-weight: 600 !important;
396
+ color: #495057 !important;
397
+ font-size: 14px !important;
398
+ margin-bottom: 8px !important;
399
+ }
400
+
401
+ /* Examples */
402
+ .gr-samples-table {
403
+ border-radius: 10px !important;
404
+ overflow: hidden !important;
405
+ }
406
+
407
+ /* Footer */
408
+ .footer {
409
+ text-align: center;
410
+ padding: 30px;
411
+ background: linear-gradient(to right, #f8f9fa, #e9ecef);
412
+ border-radius: 12px;
413
+ margin-top: 30px;
414
+ }
415
+
416
+ /* Image display */
417
+ .gr-image {
418
+ border-radius: 12px !important;
419
+ border: 2px solid #e9ecef !important;
420
+ box-shadow: 0 4px 15px rgba(0,0,0,0.1) !important;
421
+ }
422
+
423
+ /* Radio buttons and checkboxes */
424
+ .gr-radio, .gr-checkbox {
425
+ padding: 10px !important;
426
+ border-radius: 8px !important;
427
+ }
428
+
429
+ /* File upload */
430
+ .gr-file {
431
+ border: 2px dashed #667eea !important;
432
+ border-radius: 12px !important;
433
+ background: linear-gradient(to bottom, #ffffff, #f8f9fa) !important;
434
+ padding: 30px !important;
435
+ }
436
+
437
+ .gr-file:hover {
438
+ border-color: #764ba2 !important;
439
+ background: #f8f9fa !important;
440
+ }
441
+ """
442
+
443
+ # Create Gradio interface
444
+ with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, title="MissionControlMCP Demo") as demo:
445
+
446
+ gr.Markdown("# 🚀 MissionControlMCP")
447
+ gr.Markdown("### Enterprise Automation Tools - Powered by AI")
448
+
449
+ gr.HTML("""
450
+ <div style="text-align: center; padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); border-radius: 15px; color: white; margin-bottom: 30px;">
451
+ <h3 style="color: white; margin: 0;">✨ Try all 8 powerful tools in your browser - No installation needed! ✨</h3>
452
+ <p style="margin: 10px 0 0 0; opacity: 0.9;">Built for HuggingFace Gradio Hackathon | Claude MCP Integration</p>
453
+ </div>
454
+ """)
455
+
456
+ with gr.Tabs():
457
+
458
+ # ====== TAB 1: PDF READER ======
459
+ with gr.Tab("📄 PDF Reader"):
460
+ gr.Markdown("""
461
+ ### 📄 Extract Text and Metadata from PDF Documents
462
+ Upload any PDF file to extract its content, metadata, and keywords instantly.
463
+ """)
464
+
465
+ with gr.Row():
466
+ with gr.Column(scale=1):
467
+ pdf_input = gr.File(
468
+ label="📎 Upload PDF File",
469
+ file_types=[".pdf"],
470
+ elem_classes=["file-upload"]
471
+ )
472
+ pdf_btn = gr.Button(
473
+ "🔍 Extract Text from PDF",
474
+ variant="primary",
475
+ size="lg",
476
+ elem_classes=["primary-btn"]
477
+ )
478
+ gr.Markdown("""
479
+ **💡 Tips:**
480
+ - Supports multi-page PDFs
481
+ - Extracts metadata (author, title)
482
+ - Automatically generates keywords
483
+ """)
484
+
485
+ with gr.Column(scale=2):
486
+ pdf_output = gr.Textbox(
487
+ label="📊 Extraction Results",
488
+ lines=20,
489
+ elem_classes=["output-class"]
490
+ )
491
+ pdf_img = gr.Image(label="Preview", visible=False)
492
+
493
+ pdf_btn.click(tool_pdf_reader, inputs=[pdf_input], outputs=[pdf_output, pdf_img])
494
+
495
+ gr.Markdown("*💡 Try uploading your resume, research paper, or any PDF document!*")
496
+
497
+ # ====== TAB 2: TEXT EXTRACTOR ======
498
+ with gr.Tab("📝 Text Extractor"):
499
+ gr.Markdown("""
500
+ ### 📝 AI-Powered Text Analysis
501
+ Extract keywords, generate summaries, clean text, or split into chunks.
502
+ """)
503
+
504
+ with gr.Row():
505
+ with gr.Column(scale=1):
506
+ text_input = gr.Textbox(
507
+ label="✍️ Enter Your Text",
508
+ lines=10,
509
+ placeholder="Paste any text here - articles, reports, emails, etc...",
510
+ elem_classes=["input-field"]
511
+ )
512
+ text_operation = gr.Radio(
513
+ ["keywords", "summarize", "clean", "chunk"],
514
+ label="🛠️ Select Operation",
515
+ value="keywords",
516
+ info="Choose what to do with your text"
517
+ )
518
+ text_length = gr.Slider(
519
+ 100, 1000, 300,
520
+ label="📏 Max Length (for summarize/chunk)",
521
+ info="Adjust output length"
522
+ )
523
+ text_btn = gr.Button(
524
+ "✨ Process Text",
525
+ variant="primary",
526
+ size="lg",
527
+ elem_classes=["primary-btn"]
528
+ )
529
+
530
+ with gr.Column(scale=2):
531
+ text_output = gr.Textbox(
532
+ label="📊 Processing Results",
533
+ lines=20,
534
+ elem_classes=["output-class"]
535
+ )
536
+
537
+ text_btn.click(
538
+ tool_text_extractor,
539
+ inputs=[text_input, text_operation, text_length],
540
+ outputs=[text_output]
541
+ )
542
+
543
+ gr.Examples([
544
+ ["Artificial Intelligence is transforming businesses worldwide. Companies are leveraging AI for automation, decision-making, and customer service. Machine learning models can now process vast amounts of data and provide actionable insights.", "keywords", 300],
545
+ ["Climate change is one of the most pressing challenges of our time. Rising temperatures, extreme weather events, and environmental degradation require urgent action.", "summarize", 300]
546
+ ], inputs=[text_input, text_operation, text_length], label="📚 Try These Examples")
547
+
548
+ # ====== TAB 3: WEB FETCHER ======
549
+ with gr.Tab("🌐 Web Fetcher"):
550
+ gr.Markdown("""
551
+ ### 🌐 Scrape and Analyze Web Content
552
+ Fetch content from any website, extract clean text, and analyze it.
553
+ """)
554
+
555
+ with gr.Row():
556
+ with gr.Column(scale=1):
557
+ web_input = gr.Textbox(
558
+ label="🔗 Website URL",
559
+ placeholder="https://example.com",
560
+ value="https://example.com",
561
+ info="Enter any public website URL"
562
+ )
563
+ web_btn = gr.Button(
564
+ "🌐 Fetch Website",
565
+ variant="primary",
566
+ size="lg",
567
+ elem_classes=["primary-btn"]
568
+ )
569
+ gr.Markdown("""
570
+ **💡 Tips:**
571
+ - Works with most public websites
572
+ - Extracts clean text (no HTML)
573
+ - Finds all page links
574
+ - Some sites block bots (e.g., LinkedIn)
575
+ """)
576
+
577
+ with gr.Column(scale=2):
578
+ web_output = gr.Textbox(
579
+ label="📊 Website Content",
580
+ lines=20,
581
+ elem_classes=["output-class"]
582
+ )
583
+
584
+ web_btn.click(tool_web_fetcher, inputs=[web_input], outputs=[web_output])
585
+
586
+ gr.Examples([
587
+ ["https://example.com"],
588
+ ["https://python.org"],
589
+ ["https://github.com"]
590
+ ], inputs=[web_input], label="📚 Try These Examples")
591
+
592
+ # ====== TAB 4: RAG SEARCH ======
593
+ with gr.Tab("🔍 RAG Search"):
594
+ gr.Markdown("""
595
+ ### 🔍 Semantic Document Search with AI
596
+ Search through documents using AI-powered semantic understanding (RAG - Retrieval Augmented Generation).
597
+ """)
598
+
599
+ with gr.Row():
600
+ with gr.Column(scale=1):
601
+ rag_input = gr.Textbox(
602
+ label="🔎 Search Query",
603
+ placeholder="What are you looking for?",
604
+ value="What is machine learning?",
605
+ lines=3,
606
+ info="Ask questions in natural language"
607
+ )
608
+ rag_btn = gr.Button(
609
+ "🔍 Search Documents",
610
+ variant="primary",
611
+ size="lg",
612
+ elem_classes=["primary-btn"]
613
+ )
614
+ gr.Markdown("""
615
+ **💡 How it works:**
616
+ - Uses AI embeddings (FAISS)
617
+ - Understands meaning, not just keywords
618
+ - Searches 5 sample documents
619
+ - Returns relevance scores
620
+ """)
621
+
622
+ with gr.Column(scale=2):
623
+ rag_output = gr.Textbox(
624
+ label="📊 Search Results",
625
+ lines=20,
626
+ elem_classes=["output-class"]
627
+ )
628
+
629
+ rag_btn.click(tool_rag_search, inputs=[rag_input], outputs=[rag_output])
630
+
631
+ gr.Examples([
632
+ ["What is machine learning?"],
633
+ ["How to reduce carbon emissions?"],
634
+ ["What are modern web frameworks?"],
635
+ ["Digital marketing strategies"]
636
+ ], inputs=[rag_input], label="📚 Try These Searches")
637
+
638
+ # ====== TAB 5: DATA VISUALIZER ======
639
+ with gr.Tab("📊 Data Visualizer"):
640
+ gr.Markdown("""
641
+ ### 📊 Create Beautiful Charts from Your Data
642
+ Transform CSV data into stunning visualizations - line charts, bar charts, pie charts, and scatter plots.
643
+ """)
644
+
645
+ with gr.Row():
646
+ with gr.Column(scale=1):
647
+ viz_csv = gr.Textbox(
648
+ label="📋 CSV Data",
649
+ lines=10,
650
+ value=load_sample_csv(),
651
+ placeholder="month,revenue,costs\nJan,100000,60000",
652
+ info="Paste your CSV data here"
653
+ )
654
+ viz_chart = gr.Radio(
655
+ ["line", "bar", "pie", "scatter"],
656
+ label="📈 Chart Type",
657
+ value="line",
658
+ info="Select visualization style"
659
+ )
660
+ viz_x = gr.Textbox(label="📍 X-Axis Column", value="month")
661
+ viz_y = gr.Textbox(label="📍 Y-Axis Column", value="revenue")
662
+ viz_title = gr.Textbox(label="📝 Chart Title", value="Monthly Revenue")
663
+ viz_btn = gr.Button(
664
+ "📊 Create Chart",
665
+ variant="primary",
666
+ size="lg",
667
+ elem_classes=["primary-btn"]
668
+ )
669
+
670
+ with gr.Column(scale=2):
671
+ viz_output = gr.Textbox(
672
+ label="📊 Chart Status",
673
+ lines=5,
674
+ elem_classes=["output-class"]
675
+ )
676
+ viz_img = gr.Image(label="📈 Generated Chart", elem_classes=["chart-output"])
677
+
678
+ viz_btn.click(
679
+ tool_data_visualizer,
680
+ inputs=[viz_csv, viz_chart, viz_x, viz_y, viz_title],
681
+ outputs=[viz_output, viz_img]
682
+ )
683
+
684
+ gr.Markdown("*💡 Sample data is already loaded! Just click 'Create Chart' to see it in action.*")
685
+
686
+ # ====== TAB 6: EMAIL CLASSIFIER ======
687
+ with gr.Tab("📧 Email Classifier"):
688
+ gr.Markdown("""
689
+ ### 📧 AI-Powered Email Intent Detection
690
+ Automatically classify email intent and detect sentiment - complaint, inquiry, urgent, etc.
691
+ """)
692
+
693
+ with gr.Row():
694
+ with gr.Column(scale=1):
695
+ email_input = gr.Textbox(
696
+ label="✉️ Email Content",
697
+ lines=12,
698
+ value=load_sample_email(),
699
+ placeholder="Paste email content here...",
700
+ info="Paste any email text for analysis"
701
+ )
702
+ email_btn = gr.Button(
703
+ "🎯 Classify Email",
704
+ variant="primary",
705
+ size="lg",
706
+ elem_classes=["primary-btn"]
707
+ )
708
+ gr.Markdown("""
709
+ **💡 Detects 10 intents:**
710
+ - Complaint
711
+ - Inquiry
712
+ - Request
713
+ - Feedback
714
+ - Order
715
+ - Meeting
716
+ - Urgent
717
+ - Application
718
+ - Sales
719
+ - Other
720
+ """)
721
+
722
+ with gr.Column(scale=2):
723
+ email_output = gr.Textbox(
724
+ label="📊 Classification Results",
725
+ lines=20,
726
+ elem_classes=["output-class"]
727
+ )
728
+
729
+ email_btn.click(tool_email_classifier, inputs=[email_input], outputs=[email_output])
730
+
731
+ gr.Examples([
732
+ ["I am writing to complain about the poor service I received at your store yesterday."],
733
+ ["Could you please send me more information about your pricing plans?"],
734
+ ["URGENT: The server is down and customers cannot access the website!"]
735
+ ], inputs=[email_input], label="📚 Try These Examples")
736
+
737
+ # ====== TAB 7: KPI GENERATOR ======
738
+ with gr.Tab("📈 KPI Generator"):
739
+ gr.Markdown("""
740
+ ### 📈 Business KPI & Analytics Dashboard
741
+ Generate comprehensive business metrics and KPIs from your data automatically.
742
+ """)
743
+
744
+ with gr.Row():
745
+ with gr.Column(scale=1):
746
+ kpi_json = gr.Textbox(
747
+ label="📊 Business Data (JSON Format)",
748
+ lines=14,
749
+ value=load_sample_json(),
750
+ placeholder='{"revenue": 1000000, "costs": 600000}',
751
+ info="Enter your business metrics in JSON"
752
+ )
753
+ kpi_metrics = gr.CheckboxGroup(
754
+ ["revenue", "growth", "efficiency", "customer", "operational"],
755
+ label="📋 Metrics to Calculate",
756
+ value=["revenue", "growth", "efficiency"],
757
+ info="Select which KPI categories to generate"
758
+ )
759
+ kpi_btn = gr.Button(
760
+ "📈 Generate KPIs",
761
+ variant="primary",
762
+ size="lg",
763
+ elem_classes=["primary-btn"]
764
+ )
765
+ gr.Markdown("""
766
+ **💡 Generates:**
767
+ - Revenue metrics
768
+ - Growth rates
769
+ - Efficiency ratios
770
+ - Customer metrics
771
+ - Operational KPIs
772
+ - Executive summary
773
+ """)
774
+
775
+ with gr.Column(scale=2):
776
+ kpi_output = gr.Textbox(
777
+ label="📊 KPI Report",
778
+ lines=25,
779
+ elem_classes=["output-class"]
780
+ )
781
+
782
+ kpi_btn.click(
783
+ tool_kpi_generator,
784
+ inputs=[kpi_json, kpi_metrics],
785
+ outputs=[kpi_output]
786
+ )
787
+
788
+ gr.Markdown("*💡 Sample business data is already loaded! Just click 'Generate KPIs' to see results.*")
789
+
790
+ # Footer
791
+ gr.HTML("""
792
+ <div class="footer">
793
+ <h2 style="margin-bottom: 20px;">🎯 About MissionControlMCP</h2>
794
+
795
+ <p style="font-size: 18px; margin-bottom: 20px;">
796
+ <strong>8 enterprise-grade automation tools</strong> integrated with Claude Desktop via Model Context Protocol (MCP)
797
+ </p>
798
+
799
+ <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 15px; margin: 30px 0;">
800
+ <div style="padding: 15px; background: white; border-radius: 10px; box-shadow: 0 2px 8px rgba(0,0,0,0.1);">
801
+ <strong>📄 PDF Reader</strong><br/>
802
+ <small>Extract text from documents</small>
803
+ </div>
804
+ <div style="padding: 15px; background: white; border-radius: 10px; box-shadow: 0 2px 8px rgba(0,0,0,0.1);">
805
+ <strong>📝 Text Extractor</strong><br/>
806
+ <small>Keywords & summaries</small>
807
+ </div>
808
+ <div style="padding: 15px; background: white; border-radius: 10px; box-shadow: 0 2px 8px rgba(0,0,0,0.1);">
809
+ <strong>🌐 Web Fetcher</strong><br/>
810
+ <small>Scrape websites</small>
811
+ </div>
812
+ <div style="padding: 15px; background: white; border-radius: 10px; box-shadow: 0 2px 8px rgba(0,0,0,0.1);">
813
+ <strong>🔍 RAG Search</strong><br/>
814
+ <small>Semantic search</small>
815
+ </div>
816
+ <div style="padding: 15px; background: white; border-radius: 10px; box-shadow: 0 2px 8px rgba(0,0,0,0.1);">
817
+ <strong>📊 Data Visualizer</strong><br/>
818
+ <small>Create charts</small>
819
+ </div>
820
+ <div style="padding: 15px; background: white; border-radius: 10px; box-shadow: 0 2px 8px rgba(0,0,0,0.1);">
821
+ <strong>🔄 File Converter</strong><br/>
822
+ <small>Format conversions</small>
823
+ </div>
824
+ <div style="padding: 15px; background: white; border-radius: 10px; box-shadow: 0 2px 8px rgba(0,0,0,0.1);">
825
+ <strong>📧 Email Classifier</strong><br/>
826
+ <small>Intent detection</small>
827
+ </div>
828
+ <div style="padding: 15px; background: white; border-radius: 10px; box-shadow: 0 2px 8px rgba(0,0,0,0.1);">
829
+ <strong>📈 KPI Generator</strong><br/>
830
+ <small>Business analytics</small>
831
+ </div>
832
+ </div>
833
+
834
+ <div style="margin-top: 30px; padding-top: 20px; border-top: 2px solid #e9ecef;">
835
+ <p style="font-size: 16px; margin: 10px 0;">
836
+ 🔗 <a href="https://github.com/AlBaraa-1/CleanEye-Hackathon" target="_blank" style="color: #667eea; text-decoration: none; font-weight: 600;">View on GitHub</a>
837
+ </p>
838
+ <p style="margin: 10px 0; color: #6c757d;">
839
+ 🏆 Built for HuggingFace Gradio x BuildWithMCP Hackathon
840
+ </p>
841
+ <p style="margin: 10px 0; color: #6c757d;">
842
+ Made with ❤️ using Python, Gradio, Claude MCP, FAISS, and Sentence Transformers
843
+ </p>
844
+ </div>
845
+ </div>
846
+ """)
847
+
848
+
849
+ # ============================================================================
850
+ # LAUNCH
851
+ # ============================================================================
852
+
853
+ if __name__ == "__main__":
854
+ print("\n" + "="*80)
855
+ print("🚀 Launching MissionControlMCP Web Interface...")
856
+ print("="*80)
857
+
858
+ # Launch with public sharing enabled
859
+ demo.launch(
860
+ share=True, # Creates public URL!
861
+ server_name="0.0.0.0",
862
+ server_port=7860,
863
+ show_error=True
864
+ )
demo.py ADDED
@@ -0,0 +1,907 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ 🚀 MissionControlMCP - Interactive Demo
3
+ Try all 8 tools with real examples!
4
+
5
+ Run: python demo.py
6
+ """
7
+
8
+ import sys
9
+ import os
10
+ import json
11
+ import base64
12
+ from pathlib import Path
13
+
14
+ # Setup paths
15
+ SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
16
+ sys.path.append(SCRIPT_DIR)
17
+ EXAMPLES_DIR = os.path.join(SCRIPT_DIR, "examples")
18
+ OUTPUT_DIR = os.path.join(SCRIPT_DIR, "demo_output")
19
+
20
+ # Create output directory
21
+ os.makedirs(OUTPUT_DIR, exist_ok=True)
22
+
23
+ # Import tools
24
+ from tools.pdf_reader import read_pdf
25
+ from tools.text_extractor import extract_text
26
+ from tools.web_fetcher import fetch_web_content
27
+ from tools.rag_search import search_documents
28
+ from tools.data_visualizer import visualize_data
29
+ from tools.file_converter import convert_file
30
+ from tools.email_intent_classifier import classify_email_intent
31
+ from tools.kpi_generator import generate_kpis
32
+
33
+
34
+ def print_header(title):
35
+ """Print a nice header"""
36
+ print("\n" + "="*80)
37
+ print(f" {title}")
38
+ print("="*80)
39
+
40
+
41
+ def print_section(title):
42
+ """Print a section header"""
43
+ print(f"\n{'─'*80}")
44
+ print(f"📌 {title}")
45
+ print(f"{'─'*80}")
46
+
47
+
48
+ def pause(message="Press Enter to continue..."):
49
+ """Pause and wait for user input"""
50
+ input(f"\n{message}")
51
+
52
+
53
+ def save_chart(image_base64, filename):
54
+ """Save base64 chart to file"""
55
+ filepath = os.path.join(OUTPUT_DIR, filename)
56
+ with open(filepath, "wb") as f:
57
+ f.write(base64.b64decode(image_base64))
58
+ print(f"💾 Chart saved: {filepath}")
59
+ return filepath
60
+
61
+
62
+ # ============================================================================
63
+ # TOOL 1: PDF READER
64
+ # ============================================================================
65
+
66
+ def demo_pdf_reader():
67
+ """Demo: PDF Reader - Extract text from PDFs"""
68
+ print_header("TOOL 1: PDF READER 📄")
69
+
70
+ print("\n📖 What it does:")
71
+ print(" • Extracts all text from PDF files")
72
+ print(" • Gets metadata (author, title, pages)")
73
+ print(" • Perfect for reading reports, contracts, invoices")
74
+
75
+ print("\n💡 Real-world uses:")
76
+ print(" • Extract data from invoices")
77
+ print(" • Read research papers")
78
+ print(" • Process legal contracts")
79
+ print(" • Analyze business reports")
80
+
81
+ pause("\nReady to see it in action? Press Enter...")
82
+
83
+ # Check if user has their own PDF
84
+ print("\n" + "─"*80)
85
+ custom_pdf = input("Enter PDF file path (or press Enter to skip): ").strip()
86
+
87
+ if custom_pdf and os.path.exists(custom_pdf):
88
+ print(f"\n📄 Reading your PDF: {custom_pdf}")
89
+ try:
90
+ result = read_pdf(custom_pdf)
91
+ print(f"\n✅ Successfully extracted:")
92
+ print(f" • Pages: {result['pages']}")
93
+ print(f" • Characters: {len(result['text']):,}")
94
+ print(f" • Author: {result['metadata'].get('author', 'N/A')}")
95
+ print(f"\n📝 First 300 characters:")
96
+ print(result['text'][:300] + "...")
97
+
98
+ # Extract keywords from PDF
99
+ print("\n🔑 Extracting keywords from PDF...")
100
+ keywords = extract_text(result['text'], operation="keywords")
101
+ print(f"Keywords: {keywords['result']}")
102
+
103
+ except Exception as e:
104
+ print(f"❌ Error: {e}")
105
+ else:
106
+ print("\n📝 Example: How it works")
107
+ print("```python")
108
+ print("result = read_pdf('document.pdf')")
109
+ print("print(f'Pages: {result[\"pages\"]}')")
110
+ print("print(result['text'][:500]) # First 500 chars")
111
+ print("```")
112
+ print("\n💬 Output:")
113
+ print(" Pages: 16")
114
+ print(" Text: College Of Engineering - System Analysis Project...")
115
+
116
+ pause()
117
+
118
+
119
+ # ============================================================================
120
+ # TOOL 2: TEXT EXTRACTOR
121
+ # ============================================================================
122
+
123
+ def demo_text_extractor():
124
+ """Demo: Text Extractor - Process and analyze text"""
125
+ print_header("TOOL 2: TEXT EXTRACTOR 📝")
126
+
127
+ print("\n📖 What it does:")
128
+ print(" • Extract keywords from any text")
129
+ print(" • Generate summaries (any length)")
130
+ print(" • Clean messy text")
131
+ print(" • Split text into chunks")
132
+
133
+ print("\n💡 Real-world uses:")
134
+ print(" • Summarize long documents")
135
+ print(" • Find main topics in articles")
136
+ print(" • Clean data before analysis")
137
+ print(" • Prepare text for processing")
138
+
139
+ pause("\nReady to try it? Press Enter...")
140
+
141
+ # Load sample report
142
+ print_section("Using sample business report")
143
+ sample_file = os.path.join(EXAMPLES_DIR, "sample_report.txt")
144
+
145
+ try:
146
+ with open(sample_file, "r", encoding="utf-8") as f:
147
+ text = f.read()
148
+
149
+ print(f"📄 Loaded text: {len(text)} characters")
150
+ print(f"\nPreview: {text[:200]}...")
151
+
152
+ pause("\nPress Enter to extract keywords...")
153
+
154
+ # Operation 1: Keywords
155
+ print_section("Operation 1: Extract Keywords")
156
+ keywords = extract_text(text, operation="keywords")
157
+ print(f"🔑 Keywords: {keywords['result']}")
158
+
159
+ pause("\nPress Enter to generate summary...")
160
+
161
+ # Operation 2: Summarize
162
+ print_section("Operation 2: Generate Summary")
163
+ summary = extract_text(text, operation="summarize", max_length=300)
164
+ print(f"📝 Summary ({len(summary['result'])} chars):")
165
+ print(summary['result'])
166
+
167
+ pause("\nPress Enter to clean text...")
168
+
169
+ # Operation 3: Clean
170
+ print_section("Operation 3: Clean Text")
171
+ messy_text = " This has extra spaces\n\n\nand newlines "
172
+ cleaned = extract_text(messy_text, operation="clean")
173
+ print(f"Before: '{messy_text}'")
174
+ print(f"After: '{cleaned['result']}'")
175
+
176
+ # Operation 4: Chunk
177
+ print_section("Operation 4: Split into Chunks")
178
+ chunks = extract_text(text[:500], operation="chunk", max_length=100)
179
+ chunk_list = chunks['result'].split("\n\n---CHUNK---\n\n")
180
+ print(f"✂️ Split into {len(chunk_list)} chunks (100 chars each)")
181
+ print(f"Chunk 1: {chunk_list[0][:80]}...")
182
+
183
+ # Try custom text
184
+ print("\n" + "─"*80)
185
+ custom_text = input("\n✏️ Want to try your own text? Enter it (or press Enter to skip): ").strip()
186
+ if custom_text:
187
+ print("\n🔑 Keywords from your text:")
188
+ result = extract_text(custom_text, operation="keywords")
189
+ print(result['result'])
190
+
191
+ print("\n📝 Summary of your text:")
192
+ result = extract_text(custom_text, operation="summarize", max_length=300)
193
+ if result['result']:
194
+ print(result['result'])
195
+ else:
196
+ # If summary is empty, show first 300 chars as fallback
197
+ print(custom_text[:300] + ("..." if len(custom_text) > 300 else ""))
198
+
199
+ except Exception as e:
200
+ print(f"❌ Error: {e}")
201
+
202
+ pause()
203
+
204
+
205
+ # ============================================================================
206
+ # TOOL 3: WEB FETCHER
207
+ # ============================================================================
208
+
209
+ def demo_web_fetcher():
210
+ """Demo: Web Fetcher - Scrape web content"""
211
+ print_header("TOOL 3: WEB FETCHER 🌐")
212
+
213
+ print("\n📖 What it does:")
214
+ print(" • Fetches content from any website")
215
+ print(" • Extracts clean text (no HTML tags)")
216
+ print(" • Finds all links on the page")
217
+ print(" • Gets page title and metadata")
218
+
219
+ print("\n💡 Real-world uses:")
220
+ print(" • Monitor competitor websites")
221
+ print(" • Collect research data")
222
+ print(" • Track price changes")
223
+ print(" • Gather news articles")
224
+
225
+ pause("\nReady to fetch a website? Press Enter...")
226
+
227
+ # Allow retry loop
228
+ while True:
229
+ # Get URL from user
230
+ print("\n" + "─"*80)
231
+ url = input("Enter URL to fetch (or press Enter for example.com): ").strip()
232
+ if not url:
233
+ url = "https://example.com"
234
+
235
+ print(f"\n🌐 Fetching: {url}")
236
+ print("⏳ Please wait...")
237
+
238
+ success = False
239
+ try:
240
+ result = fetch_web_content(url)
241
+
242
+ print(f"\n✅ Success!")
243
+ print(f" • Status: {result['status_code']}")
244
+ print(f" • Title: {result.get('title', 'N/A')}")
245
+ print(f" • Content length: {len(result['content']):,} characters")
246
+ print(f" • Links found: {len(result.get('links', []))}")
247
+
248
+ # Check if content is available
249
+ if result['status_code'] == 999:
250
+ print(f"\n⚠️ Status 999 detected - Website is blocking automated requests")
251
+ print(" This is common for LinkedIn, Facebook, and other sites with bot protection")
252
+ print(" Try a different website!")
253
+ elif not result['content'].strip():
254
+ print(f"\n⚠️ No content extracted - the page might be dynamic (JavaScript-based)")
255
+ else:
256
+ success = True
257
+ print(f"\n📄 Content preview (first 500 chars):")
258
+ print(result['content'][:500] + "...")
259
+
260
+ if result.get('links'):
261
+ print(f"\n🔗 First 5 links:")
262
+ for link in result['links'][:5]:
263
+ print(f" • {link[:80]}") # Truncate long URLs
264
+
265
+ # Extract keywords from webpage
266
+ if len(result['content']) > 50:
267
+ pause("\nPress Enter to extract keywords from this page...")
268
+ keywords = extract_text(result['content'], operation="keywords")
269
+ print(f"\n🔑 Keywords from webpage:")
270
+ print(f" {keywords['result']}")
271
+
272
+ except Exception as e:
273
+ print(f"❌ Error fetching URL: {e}")
274
+ print("Tip: Make sure the URL is valid and accessible!")
275
+
276
+ # Ask if user wants to try another URL
277
+ print("\n" + "─"*80)
278
+ retry = input("Try another URL? (y/n): ").strip().lower()
279
+ if retry != 'y':
280
+ break
281
+
282
+ pause()
283
+
284
+
285
+ # ============================================================================
286
+ # TOOL 4: RAG SEARCH
287
+ # ============================================================================
288
+
289
+ def demo_rag_search():
290
+ """Demo: RAG Search - Semantic document search"""
291
+ print_header("TOOL 4: RAG SEARCH 🔍")
292
+
293
+ print("\n📖 What it does:")
294
+ print(" • Semantic search (understands meaning, not just keywords)")
295
+ print(" • Finds relevant documents even with different words")
296
+ print(" • Uses AI embeddings (sentence transformers)")
297
+ print(" • Powered by FAISS vector database")
298
+
299
+ print("\n💡 Real-world uses:")
300
+ print(" • Search company knowledge base")
301
+ print(" • Find similar documents")
302
+ print(" • Answer questions from docs")
303
+ print(" • Build smart FAQ systems")
304
+
305
+ pause("\nReady to see semantic search in action? Press Enter...")
306
+
307
+ # Load sample documents
308
+ print_section("Loading sample documents")
309
+ docs_file = os.path.join(EXAMPLES_DIR, "sample_documents.txt")
310
+
311
+ try:
312
+ with open(docs_file, "r", encoding="utf-8") as f:
313
+ content = f.read()
314
+
315
+ documents = [doc.strip() for doc in content.split("##") if doc.strip()]
316
+ print(f"📚 Loaded {len(documents)} documents about:")
317
+ topics = ["AI & Machine Learning", "Climate Change", "Web Development",
318
+ "Digital Marketing", "Financial Technology"]
319
+ for i, topic in enumerate(topics, 1):
320
+ print(f" {i}. {topic}")
321
+
322
+ pause("\nPress Enter to search...")
323
+
324
+ # Example searches
325
+ queries = [
326
+ ("What is machine learning?", "Testing: Does it find AI doc?"),
327
+ ("How to reduce carbon emissions?", "Testing: Does it find climate doc?"),
328
+ ("What are modern web frameworks?", "Testing: Does it find web dev doc?"),
329
+ ]
330
+
331
+ for query, description in queries:
332
+ print_section(description)
333
+ print(f"🔍 Query: '{query}'")
334
+ print("⏳ Searching...")
335
+
336
+ result = search_documents(query, documents, top_k=2)
337
+
338
+ print(f"\n✅ Found {len(result['results'])} relevant results:")
339
+ for i, res in enumerate(result['results'], 1):
340
+ preview = res['document'][:120].replace('\n', ' ')
341
+ print(f"\n {i}. Relevance Score: {res['score']:.4f}")
342
+ print(f" {preview}...")
343
+
344
+ pause()
345
+
346
+ # Custom search
347
+ print("\n" + "─"*80)
348
+ custom_query = input("\n✏️ Try your own search query (or press Enter to skip): ").strip()
349
+ if custom_query:
350
+ print(f"\n🔍 Searching for: '{custom_query}'")
351
+ result = search_documents(custom_query, documents, top_k=3)
352
+ print(f"\n📊 Top {len(result['results'])} results:")
353
+ for i, res in enumerate(result['results'], 1):
354
+ preview = res['document'][:100].replace('\n', ' ')
355
+ print(f"\n {i}. Score: {res['score']:.4f}")
356
+ print(f" {preview}...")
357
+
358
+ except Exception as e:
359
+ print(f"❌ Error: {e}")
360
+ import traceback
361
+ traceback.print_exc()
362
+
363
+ pause()
364
+
365
+
366
+ # ============================================================================
367
+ # TOOL 5: DATA VISUALIZER
368
+ # ============================================================================
369
+
370
+ def demo_data_visualizer():
371
+ """Demo: Data Visualizer - Create charts"""
372
+ print_header("TOOL 5: DATA VISUALIZER 📊")
373
+
374
+ print("\n📖 What it does:")
375
+ print(" • Creates beautiful charts from data")
376
+ print(" • Supports: Bar, Line, Pie, Scatter plots")
377
+ print(" • Accepts CSV or JSON data")
378
+ print(" • Exports as PNG images")
379
+
380
+ print("\n💡 Real-world uses:")
381
+ print(" • Visualize sales trends")
382
+ print(" • Create financial reports")
383
+ print(" • Compare performance metrics")
384
+ print(" • Present data insights")
385
+
386
+ pause("\nReady to create charts? Press Enter...")
387
+
388
+ # Load sample data
389
+ print_section("Loading business data")
390
+ csv_file = os.path.join(EXAMPLES_DIR, "business_data.csv")
391
+
392
+ try:
393
+ with open(csv_file, "r") as f:
394
+ csv_data = f.read()
395
+
396
+ print("📁 Sample data (12 months):")
397
+ print(csv_data[:200] + "...")
398
+
399
+ pause("\nPress Enter to create LINE CHART (Revenue Trends)...")
400
+
401
+ # Chart 1: Line chart
402
+ print_section("Creating Chart 1: Revenue Line Chart")
403
+ result1 = visualize_data(
404
+ data=csv_data,
405
+ chart_type="line",
406
+ x_column="month",
407
+ y_column="revenue",
408
+ title="Monthly Revenue Trends 2024"
409
+ )
410
+ filepath1 = save_chart(result1['image_base64'], "revenue_trends.png")
411
+ print(f"✅ Line chart created!")
412
+ print(f" Size: {len(result1['image_base64']):,} bytes (base64)")
413
+ print(f" Dimensions: {result1['dimensions']}")
414
+
415
+ pause("\nPress Enter to create BAR CHART (Monthly Costs)...")
416
+
417
+ # Chart 2: Bar chart
418
+ print_section("Creating Chart 2: Costs Bar Chart")
419
+ result2 = visualize_data(
420
+ data=csv_data,
421
+ chart_type="bar",
422
+ x_column="month",
423
+ y_column="costs",
424
+ title="Monthly Costs 2024"
425
+ )
426
+ filepath2 = save_chart(result2['image_base64'], "monthly_costs.png")
427
+ print(f"✅ Bar chart created!")
428
+
429
+ pause("\nPress Enter to create PIE CHART (Customer Distribution)...")
430
+
431
+ # Chart 3: Pie chart
432
+ print_section("Creating Chart 3: Customers Pie Chart")
433
+ # Create sample pie data
434
+ pie_data = """category,value
435
+ Q1,650
436
+ Q2,600
437
+ Q3,550
438
+ Q4,500"""
439
+ result3 = visualize_data(
440
+ data=pie_data,
441
+ chart_type="pie",
442
+ x_column="category",
443
+ y_column="value",
444
+ title="Customers by Quarter"
445
+ )
446
+ filepath3 = save_chart(result3['image_base64'], "customer_pie.png")
447
+ print(f"✅ Pie chart created!")
448
+
449
+ print(f"\n📊 All charts saved in: {OUTPUT_DIR}")
450
+ print(f" • {os.path.basename(filepath1)}")
451
+ print(f" • {os.path.basename(filepath2)}")
452
+ print(f" • {os.path.basename(filepath3)}")
453
+
454
+ print("\n💡 You can open these PNG files to view the charts!")
455
+
456
+ except Exception as e:
457
+ print(f"❌ Error: {e}")
458
+ import traceback
459
+ traceback.print_exc()
460
+
461
+ pause()
462
+
463
+
464
+ # ============================================================================
465
+ # TOOL 6: FILE CONVERTER
466
+ # ============================================================================
467
+
468
+ def demo_file_converter():
469
+ """Demo: File Converter - Convert between formats"""
470
+ print_header("TOOL 6: FILE CONVERTER 🔄")
471
+
472
+ print("\n📖 What it does:")
473
+ print(" • Convert PDF ↔ TXT")
474
+ print(" • Convert TXT ↔ CSV")
475
+ print(" • Batch file processing")
476
+ print(" • Preserves data integrity")
477
+
478
+ print("\n💡 Real-world uses:")
479
+ print(" • Extract text from PDFs")
480
+ print(" • Convert reports to CSV for analysis")
481
+ print(" • Prepare data for databases")
482
+ print(" • Archive documents in different formats")
483
+
484
+ print("\n🔧 Available conversions:")
485
+ print(" • pdf_to_txt - Extract text from PDF")
486
+ print(" • txt_to_pdf - Create PDF from text")
487
+ print(" • csv_to_txt - Convert CSV to plain text")
488
+ print(" • txt_to_csv - Structure text as CSV")
489
+
490
+ pause("\nReady to see file conversions? Press Enter...")
491
+
492
+ try:
493
+ # Demo 1: CSV to TXT
494
+ print_section("Demo 1: CSV → TXT Conversion")
495
+ csv_file = os.path.join(EXAMPLES_DIR, "business_data.csv")
496
+ txt_output = os.path.join(OUTPUT_DIR, "business_data.txt")
497
+
498
+ print(f"📂 Converting: business_data.csv → business_data.txt")
499
+ print("⏳ Processing...")
500
+
501
+ result1 = convert_file(
502
+ input_path=csv_file,
503
+ output_path=txt_output,
504
+ conversion_type="csv_to_txt"
505
+ )
506
+
507
+ if result1['success']:
508
+ print(f"✅ Conversion successful!")
509
+ print(f" Output: {result1['output_file']}")
510
+
511
+ # Show preview
512
+ with open(txt_output, 'r', encoding='utf-8') as f:
513
+ preview = f.read()[:300]
514
+ print(f"\n📄 Preview of converted file:")
515
+ print(preview + "...")
516
+
517
+ pause("\nPress Enter for next conversion...")
518
+
519
+ # Demo 2: TXT to CSV
520
+ print_section("Demo 2: TXT → CSV Conversion")
521
+ txt_input = os.path.join(EXAMPLES_DIR, "sample_report.txt")
522
+ csv_output = os.path.join(OUTPUT_DIR, "sample_report.csv")
523
+
524
+ print(f"📂 Converting: sample_report.txt → sample_report.csv")
525
+ print("⏳ Processing...")
526
+
527
+ result2 = convert_file(
528
+ input_path=txt_input,
529
+ output_path=csv_output,
530
+ conversion_type="txt_to_csv"
531
+ )
532
+
533
+ if result2['success']:
534
+ print(f"✅ Conversion successful!")
535
+ print(f" Output: {result2['output_file']}")
536
+
537
+ # Show preview
538
+ with open(csv_output, 'r', encoding='utf-8') as f:
539
+ lines = f.readlines()[:5]
540
+ print(f"\n📄 First 5 lines of CSV:")
541
+ for line in lines:
542
+ print(f" {line.strip()}")
543
+
544
+ print(f"\n� Converted files saved in: {OUTPUT_DIR}")
545
+ print(f" • business_data.txt")
546
+ print(f" • sample_report.csv")
547
+
548
+ # Offer custom conversion
549
+ print("\n" + "─"*80)
550
+ print("\n🔧 Want to convert your own file?")
551
+ print("Supported conversions: pdf_to_txt, txt_to_pdf, csv_to_txt, txt_to_csv")
552
+
553
+ custom_input = input("\nEnter input file path (or press Enter to skip): ").strip()
554
+ if custom_input and os.path.exists(custom_input):
555
+ custom_output = input("Enter output file path: ").strip()
556
+ conversion_type = input("Enter conversion type (e.g., pdf_to_txt): ").strip()
557
+
558
+ if custom_output and conversion_type:
559
+ print(f"\n🔄 Converting {os.path.basename(custom_input)}...")
560
+ try:
561
+ result = convert_file(custom_input, custom_output, conversion_type)
562
+ if result['success']:
563
+ print(f"✅ Success! File saved: {result['output_file']}")
564
+ except Exception as e:
565
+ print(f"❌ Conversion failed: {e}")
566
+
567
+ except Exception as e:
568
+ print(f"❌ Error: {e}")
569
+ import traceback
570
+ traceback.print_exc()
571
+
572
+ pause()
573
+
574
+
575
+ # ============================================================================
576
+ # TOOL 7: EMAIL INTENT CLASSIFIER
577
+ # ============================================================================
578
+
579
+ def demo_email_classifier():
580
+ """Demo: Email Intent Classifier - Understand email purpose"""
581
+ print_header("TOOL 7: EMAIL INTENT CLASSIFIER 📧")
582
+
583
+ print("\n📖 What it does:")
584
+ print(" • Automatically classifies email intent")
585
+ print(" • Detects 10 different types")
586
+ print(" • Gives confidence scores")
587
+ print(" • Finds secondary intents too")
588
+
589
+ print("\n📬 Detects these intents:")
590
+ intents = [
591
+ "complaint", "inquiry", "request", "feedback", "order",
592
+ "meeting", "urgent", "application", "sales", "other"
593
+ ]
594
+ for i, intent in enumerate(intents, 1):
595
+ print(f" {i:2d}. {intent.title()}")
596
+
597
+ print("\n💡 Real-world uses:")
598
+ print(" • Auto-route customer emails")
599
+ print(" • Prioritize urgent messages")
600
+ print(" • Organize inbox automatically")
601
+ print(" • Track complaint patterns")
602
+
603
+ pause("\nReady to classify emails? Press Enter...")
604
+
605
+ # Test with sample emails
606
+ email_files = [
607
+ ("sample_email_complaint.txt", "Customer Complaint"),
608
+ ("sample_email_inquiry.txt", "Sales Inquiry"),
609
+ ("sample_email_urgent.txt", "Urgent Issue"),
610
+ ]
611
+
612
+ for filename, label in email_files:
613
+ print_section(f"Email: {label}")
614
+ filepath = os.path.join(EXAMPLES_DIR, filename)
615
+
616
+ try:
617
+ with open(filepath, "r", encoding="utf-8") as f:
618
+ email_text = f.read()
619
+
620
+ print(f"📧 Email content:")
621
+ print(email_text[:200] + "...\n")
622
+
623
+ result = classify_email_intent(email_text)
624
+
625
+ print(f"🎯 Classification Results:")
626
+ print(f" Primary Intent: {result['intent'].upper()}")
627
+ print(f" Confidence: {result['confidence']:.2%}")
628
+
629
+ if result['secondary_intents']:
630
+ print(f"\n Secondary Intents:")
631
+ for intent in result['secondary_intents'][:3]:
632
+ print(f" • {intent['intent']}: {intent['confidence']:.2%}")
633
+
634
+ print(f"\n💬 {result['explanation']}")
635
+
636
+ pause()
637
+
638
+ except Exception as e:
639
+ print(f"❌ Error: {e}")
640
+
641
+ # Custom email
642
+ print("\n" + "─"*80)
643
+ print("\n✏️ Want to try your own email?")
644
+ custom_email = input("Paste email text (or press Enter to skip): ").strip()
645
+
646
+ if custom_email:
647
+ print("\n🔍 Analyzing your email...")
648
+ result = classify_email_intent(custom_email)
649
+ print(f"\n🎯 Intent: {result['intent'].upper()}")
650
+ print(f" Confidence: {result['confidence']:.2%}")
651
+ if result['secondary_intents']:
652
+ print(f" Also detected: {result['secondary_intents'][0]['intent']}")
653
+
654
+ pause()
655
+
656
+
657
+ # ============================================================================
658
+ # TOOL 8: KPI GENERATOR
659
+ # ============================================================================
660
+
661
+ def demo_kpi_generator():
662
+ """Demo: KPI Generator - Calculate business metrics"""
663
+ print_header("TOOL 8: KPI GENERATOR 📈")
664
+
665
+ print("\n📖 What it does:")
666
+ print(" • Calculates business KPIs automatically")
667
+ print(" • Analyzes 5 metric categories")
668
+ print(" • Identifies trends and insights")
669
+ print(" • Generates executive summaries")
670
+
671
+ print("\n📊 Metric categories:")
672
+ print(" 1. Revenue - Total revenue, profit, margins")
673
+ print(" 2. Growth - Growth rates, trends over time")
674
+ print(" 3. Efficiency - Revenue per employee/customer")
675
+ print(" 4. Customer - Customer acquisition, retention")
676
+ print(" 5. Operational - Operational efficiency metrics")
677
+
678
+ print("\n💡 Real-world uses:")
679
+ print(" • Monthly performance reports")
680
+ print(" • Executive dashboards")
681
+ print(" • Investor presentations")
682
+ print(" • Business health monitoring")
683
+
684
+ pause("\nReady to generate KPIs? Press Enter...")
685
+
686
+ # Sample business data
687
+ print_section("Sample Business Data")
688
+ business_data = {
689
+ "revenue": 5500000,
690
+ "costs": 3400000,
691
+ "customers": 2700,
692
+ "current_revenue": 5500000,
693
+ "previous_revenue": 5400000,
694
+ "current_customers": 2700,
695
+ "previous_customers": 2650,
696
+ "employees": 50,
697
+ "marketing_spend": 500000,
698
+ "sales": 5500000,
699
+ "cogs": 2000000
700
+ }
701
+
702
+ print("📊 Input data:")
703
+ for key, value in business_data.items():
704
+ if 'revenue' in key or 'cost' in key or 'spend' in key or 'sales' in key or 'cogs' in key:
705
+ print(f" • {key}: ${value:,}")
706
+ else:
707
+ print(f" • {key}: {value:,}")
708
+
709
+ pause("\nPress Enter to calculate KPIs...")
710
+
711
+ try:
712
+ # Generate KPIs
713
+ print_section("Calculating KPIs")
714
+ print("⏳ Analyzing data...")
715
+
716
+ result = generate_kpis(
717
+ json.dumps(business_data),
718
+ metrics=["revenue", "growth", "efficiency"]
719
+ )
720
+
721
+ print(f"\n✅ Generated {len(result['kpis'])} KPIs:")
722
+ print("\n📈 Key Metrics:")
723
+
724
+ # Display KPIs nicely
725
+ kpi_items = list(result['kpis'].items())
726
+ for i, (name, value) in enumerate(kpi_items[:10], 1): # Show top 10
727
+ # Format based on metric type
728
+ if 'percent' in name or 'rate' in name or 'margin' in name:
729
+ formatted = f"{value:.1f}%"
730
+ elif 'revenue' in name or 'profit' in name or 'cost' in name:
731
+ formatted = f"${value:,.0f}"
732
+ else:
733
+ formatted = f"{value:,.2f}"
734
+
735
+ # Clean name
736
+ display_name = name.replace('_', ' ').title()
737
+ print(f" {i:2d}. {display_name}: {formatted}")
738
+
739
+ if len(kpi_items) > 10:
740
+ print(f" ... and {len(kpi_items) - 10} more")
741
+
742
+ pause("\nPress Enter to see executive summary...")
743
+
744
+ # Summary
745
+ print_section("Executive Summary")
746
+ print(result['summary'])
747
+
748
+ # Trends
749
+ if result.get('trends'):
750
+ print("\n📊 Key Trends Identified:")
751
+ for i, trend in enumerate(result['trends'], 1):
752
+ print(f" {i}. {trend}")
753
+
754
+ # Try custom data
755
+ print("\n" + "─"*80)
756
+ print("\n✏️ Want to calculate KPIs for your own data?")
757
+ print("Enter JSON data (or press Enter to skip):")
758
+ print("Example: {\"revenue\": 1000000, \"costs\": 600000, \"customers\": 500}")
759
+
760
+ custom_data = input("\nYour data: ").strip()
761
+ if custom_data:
762
+ try:
763
+ # Validate JSON
764
+ json.loads(custom_data)
765
+ result = generate_kpis(custom_data, metrics=["revenue"])
766
+ print(f"\n✅ Your KPIs:")
767
+ for name, value in list(result['kpis'].items())[:5]:
768
+ print(f" • {name}: {value}")
769
+ except json.JSONDecodeError:
770
+ print("❌ Invalid JSON format!")
771
+ except Exception as e:
772
+ print(f"❌ Error: {e}")
773
+
774
+ except Exception as e:
775
+ print(f"❌ Error: {e}")
776
+ import traceback
777
+ traceback.print_exc()
778
+
779
+ pause()
780
+
781
+
782
+ # ============================================================================
783
+ # MAIN MENU
784
+ # ============================================================================
785
+
786
+ def show_menu():
787
+ """Display main menu"""
788
+ print("\n" + "╔" + "═"*78 + "╗")
789
+ print("║" + " "*20 + "🚀 MissionControlMCP Demo" + " "*33 + "║")
790
+ print("║" + " "*25 + "Try All 8 Tools!" + " "*36 + "║")
791
+ print("╚" + "═"*78 + "╝")
792
+
793
+ print("\n📋 MENU - Choose a tool to try:")
794
+ print("\n [1] 📄 PDF Reader - Extract text from PDFs")
795
+ print(" [2] 📝 Text Extractor - Keywords, summaries, cleaning")
796
+ print(" [3] 🌐 Web Fetcher - Scrape website content")
797
+ print(" [4] 🔍 RAG Search - Semantic document search")
798
+ print(" [5] 📊 Data Visualizer - Create beautiful charts")
799
+ print(" [6] 🔄 File Converter - Convert file formats")
800
+ print(" [7] 📧 Email Classifier - Detect email intent")
801
+ print(" [8] 📈 KPI Generator - Business metrics & insights")
802
+
803
+ print("\n [9] 🎯 Run ALL Tools - Full demo (recommended!)")
804
+ print(" [0] 🚪 Exit")
805
+
806
+ print("\n" + "─"*80)
807
+
808
+
809
+ def run_all_tools():
810
+ """Run all tool demos in sequence"""
811
+ print_header("🎯 RUNNING ALL TOOLS - COMPLETE DEMO")
812
+ print("\nThis will walk you through all 8 tools with examples.")
813
+ print("You can pause, try your own data, and explore each tool.")
814
+
815
+ pause("\nReady to start? Press Enter...")
816
+
817
+ tools = [
818
+ demo_pdf_reader,
819
+ demo_text_extractor,
820
+ demo_web_fetcher,
821
+ demo_rag_search,
822
+ demo_data_visualizer,
823
+ demo_file_converter,
824
+ demo_email_classifier,
825
+ demo_kpi_generator
826
+ ]
827
+
828
+ for i, tool_func in enumerate(tools, 1):
829
+ print(f"\n\n{'='*80}")
830
+ print(f" TOOL {i} OF {len(tools)}")
831
+ print(f"{'='*80}")
832
+ tool_func()
833
+
834
+ print_header("🎉 DEMO COMPLETE!")
835
+ print("\n✅ You've explored all 8 MissionControlMCP tools!")
836
+ print(f"\n📁 Generated files saved in: {OUTPUT_DIR}")
837
+ print("\n💡 Next steps:")
838
+ print(" • Try the tools with your own data")
839
+ print(" • Integrate with Claude Desktop")
840
+ print(" • Build custom workflows")
841
+ print(" • Check out the documentation (README.md)")
842
+ print("\n🚀 Happy automating!")
843
+
844
+
845
+ def main():
846
+ """Main program loop"""
847
+
848
+ print("\n" + "╔" + "═"*78 + "╗")
849
+ print("║" + " "*15 + "Welcome to MissionControlMCP Demo!" + " "*29 + "║")
850
+ print("╚" + "═"*78 + "╝")
851
+
852
+ print("\n👋 This interactive demo lets you:")
853
+ print(" ✅ Try all 8 enterprise automation tools")
854
+ print(" ✅ See real examples with sample data")
855
+ print(" ✅ Test with your own data")
856
+ print(" ✅ Understand what each tool does")
857
+
858
+ pause("\nPress Enter to continue...")
859
+
860
+ while True:
861
+ show_menu()
862
+
863
+ choice = input("\n👉 Enter your choice (0-9): ").strip()
864
+
865
+ if choice == "1":
866
+ demo_pdf_reader()
867
+ elif choice == "2":
868
+ demo_text_extractor()
869
+ elif choice == "3":
870
+ demo_web_fetcher()
871
+ elif choice == "4":
872
+ demo_rag_search()
873
+ elif choice == "5":
874
+ demo_data_visualizer()
875
+ elif choice == "6":
876
+ demo_file_converter()
877
+ elif choice == "7":
878
+ demo_email_classifier()
879
+ elif choice == "8":
880
+ demo_kpi_generator()
881
+ elif choice == "9":
882
+ run_all_tools()
883
+ elif choice == "0":
884
+ print("\n👋 Thanks for trying MissionControlMCP!")
885
+ print("🚀 Check out the docs for more: README.md")
886
+ break
887
+ else:
888
+ print("\n❌ Invalid choice! Please enter 0-9")
889
+
890
+ # Ask if user wants to continue
891
+ if choice != "9": # Don't ask after running all tools
892
+ print("\n" + "─"*80)
893
+ continue_choice = input("Return to menu? (y/n): ").strip().lower()
894
+ if continue_choice != 'y':
895
+ print("\n👋 Thanks for trying MissionControlMCP!")
896
+ break
897
+
898
+
899
+ if __name__ == "__main__":
900
+ try:
901
+ main()
902
+ except KeyboardInterrupt:
903
+ print("\n\n👋 Demo interrupted. See you next time!")
904
+ except Exception as e:
905
+ print(f"\n\n❌ Unexpected error: {e}")
906
+ import traceback
907
+ traceback.print_exc()
mcp_server.py ADDED
@@ -0,0 +1,316 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ MissionControlMCP - Enterprise Automation MCP Server
3
+ Main server implementation using MCP SDK
4
+ """
5
+ import logging
6
+ from typing import Any
7
+ import sys
8
+ import os
9
+
10
+ # Setup paths
11
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
12
+
13
+ # Import MCP SDK
14
+ from mcp.server import Server
15
+ from mcp.types import Tool, TextContent
16
+
17
+ # Import tool functions
18
+ from tools.pdf_reader import read_pdf
19
+ from tools.text_extractor import extract_text
20
+ from tools.web_fetcher import fetch_web_content
21
+ from tools.rag_search import search_documents
22
+ from tools.data_visualizer import visualize_data
23
+ from tools.file_converter import convert_file
24
+ from tools.email_intent_classifier import classify_email_intent
25
+ from tools.kpi_generator import generate_kpis
26
+
27
+ # Setup logging
28
+ logging.basicConfig(level=logging.INFO)
29
+ logger = logging.getLogger(__name__)
30
+
31
+ # Create MCP server instance
32
+ app = Server("mission-control-mcp")
33
+
34
+
35
+ # Tool definitions
36
+ TOOLS = [
37
+ Tool(
38
+ name="pdf_reader",
39
+ description="Extract text and metadata from PDF files. Reads all pages and extracts document information.",
40
+ inputSchema={
41
+ "type": "object",
42
+ "properties": {
43
+ "file_path": {
44
+ "type": "string",
45
+ "description": "Path to the PDF file to read"
46
+ }
47
+ },
48
+ "required": ["file_path"]
49
+ }
50
+ ),
51
+ Tool(
52
+ name="text_extractor",
53
+ description="Process and extract information from text. Supports cleaning, summarization, chunking, and keyword extraction.",
54
+ inputSchema={
55
+ "type": "object",
56
+ "properties": {
57
+ "text": {
58
+ "type": "string",
59
+ "description": "Raw text to process"
60
+ },
61
+ "operation": {
62
+ "type": "string",
63
+ "description": "Operation: 'clean', 'summarize', 'chunk', or 'keywords'",
64
+ "enum": ["clean", "summarize", "chunk", "keywords"],
65
+ "default": "clean"
66
+ },
67
+ "max_length": {
68
+ "type": "integer",
69
+ "description": "Maximum length for summary or chunk size",
70
+ "default": 500
71
+ }
72
+ },
73
+ "required": ["text"]
74
+ }
75
+ ),
76
+ Tool(
77
+ name="web_fetcher",
78
+ description="Fetch and extract content from web URLs. Returns clean text or HTML content with metadata.",
79
+ inputSchema={
80
+ "type": "object",
81
+ "properties": {
82
+ "url": {
83
+ "type": "string",
84
+ "description": "URL to fetch content from"
85
+ },
86
+ "extract_text_only": {
87
+ "type": "boolean",
88
+ "description": "Extract only text content (removes HTML)",
89
+ "default": True
90
+ }
91
+ },
92
+ "required": ["url"]
93
+ }
94
+ ),
95
+ Tool(
96
+ name="rag_search",
97
+ description="Semantic search using RAG (Retrieval Augmented Generation). Finds relevant documents using vector embeddings.",
98
+ inputSchema={
99
+ "type": "object",
100
+ "properties": {
101
+ "query": {
102
+ "type": "string",
103
+ "description": "Search query"
104
+ },
105
+ "documents": {
106
+ "type": "array",
107
+ "items": {"type": "string"},
108
+ "description": "List of documents to search in"
109
+ },
110
+ "top_k": {
111
+ "type": "integer",
112
+ "description": "Number of top results to return",
113
+ "default": 3
114
+ }
115
+ },
116
+ "required": ["query", "documents"]
117
+ }
118
+ ),
119
+ Tool(
120
+ name="data_visualizer",
121
+ description="Create data visualizations and charts. Supports bar, line, pie, and scatter charts from JSON or CSV data.",
122
+ inputSchema={
123
+ "type": "object",
124
+ "properties": {
125
+ "data": {
126
+ "type": "string",
127
+ "description": "JSON or CSV string data"
128
+ },
129
+ "chart_type": {
130
+ "type": "string",
131
+ "description": "Chart type",
132
+ "enum": ["bar", "line", "pie", "scatter"],
133
+ "default": "bar"
134
+ },
135
+ "x_column": {
136
+ "type": "string",
137
+ "description": "X-axis column name"
138
+ },
139
+ "y_column": {
140
+ "type": "string",
141
+ "description": "Y-axis column name"
142
+ },
143
+ "title": {
144
+ "type": "string",
145
+ "description": "Chart title",
146
+ "default": "Data Visualization"
147
+ }
148
+ },
149
+ "required": ["data"]
150
+ }
151
+ ),
152
+ Tool(
153
+ name="file_converter",
154
+ description="Convert files between formats. Supports PDF↔TXT, TXT↔CSV conversions.",
155
+ inputSchema={
156
+ "type": "object",
157
+ "properties": {
158
+ "input_path": {
159
+ "type": "string",
160
+ "description": "Path to input file"
161
+ },
162
+ "output_format": {
163
+ "type": "string",
164
+ "description": "Desired output format",
165
+ "enum": ["txt", "csv", "pdf"]
166
+ },
167
+ "output_path": {
168
+ "type": "string",
169
+ "description": "Optional output file path"
170
+ }
171
+ },
172
+ "required": ["input_path", "output_format"]
173
+ }
174
+ ),
175
+ Tool(
176
+ name="email_intent_classifier",
177
+ description="Classify email intent using NLP. Identifies inquiry, complaint, request, feedback, meeting, order, urgent, follow-up, thank you, and application intents.",
178
+ inputSchema={
179
+ "type": "object",
180
+ "properties": {
181
+ "email_text": {
182
+ "type": "string",
183
+ "description": "Email text to classify"
184
+ }
185
+ },
186
+ "required": ["email_text"]
187
+ }
188
+ ),
189
+ Tool(
190
+ name="kpi_generator",
191
+ description="Generate business KPIs and insights from data. Calculates revenue, growth, efficiency, customer, and operational metrics.",
192
+ inputSchema={
193
+ "type": "object",
194
+ "properties": {
195
+ "data": {
196
+ "type": "string",
197
+ "description": "JSON string with business data"
198
+ },
199
+ "metrics": {
200
+ "type": "array",
201
+ "items": {
202
+ "type": "string",
203
+ "enum": ["revenue", "growth", "efficiency", "customer", "operational"]
204
+ },
205
+ "description": "List of metrics to calculate",
206
+ "default": ["revenue", "growth", "efficiency"]
207
+ }
208
+ },
209
+ "required": ["data"]
210
+ }
211
+ )
212
+ ]
213
+
214
+
215
+ @app.list_tools()
216
+ async def list_tools() -> list[Tool]:
217
+ """List all available tools"""
218
+ return TOOLS
219
+
220
+
221
+ @app.call_tool()
222
+ async def call_tool(name: str, arguments: Any) -> list[TextContent]:
223
+ """
224
+ Handle tool execution requests
225
+
226
+ Args:
227
+ name: Tool name
228
+ arguments: Tool arguments
229
+
230
+ Returns:
231
+ List of TextContent responses
232
+ """
233
+ try:
234
+ logger.info(f"Executing tool: {name}")
235
+
236
+ result = None
237
+
238
+ if name == "pdf_reader":
239
+ result = read_pdf(arguments["file_path"])
240
+
241
+ elif name == "text_extractor":
242
+ result = extract_text(
243
+ text=arguments["text"],
244
+ operation=arguments.get("operation", "clean"),
245
+ max_length=arguments.get("max_length", 500)
246
+ )
247
+
248
+ elif name == "web_fetcher":
249
+ result = fetch_web_content(
250
+ url=arguments["url"],
251
+ extract_text_only=arguments.get("extract_text_only", True)
252
+ )
253
+
254
+ elif name == "rag_search":
255
+ result = search_documents(
256
+ query=arguments["query"],
257
+ documents=arguments["documents"],
258
+ top_k=arguments.get("top_k", 3)
259
+ )
260
+
261
+ elif name == "data_visualizer":
262
+ result = visualize_data(
263
+ data=arguments["data"],
264
+ chart_type=arguments.get("chart_type", "bar"),
265
+ x_column=arguments.get("x_column"),
266
+ y_column=arguments.get("y_column"),
267
+ title=arguments.get("title", "Data Visualization")
268
+ )
269
+
270
+ elif name == "file_converter":
271
+ result = convert_file(
272
+ input_path=arguments["input_path"],
273
+ output_format=arguments["output_format"],
274
+ output_path=arguments.get("output_path")
275
+ )
276
+
277
+ elif name == "email_intent_classifier":
278
+ result = classify_email_intent(arguments["email_text"])
279
+
280
+ elif name == "kpi_generator":
281
+ result = generate_kpis(
282
+ data=arguments["data"],
283
+ metrics=arguments.get("metrics", ["revenue", "growth", "efficiency"])
284
+ )
285
+
286
+ else:
287
+ raise ValueError(f"Unknown tool: {name}")
288
+
289
+ # Format result as JSON string
290
+ import json
291
+ result_text = json.dumps(result, indent=2, default=str)
292
+
293
+ return [TextContent(type="text", text=result_text)]
294
+
295
+ except Exception as e:
296
+ logger.error(f"Error executing tool {name}: {e}", exc_info=True)
297
+ error_msg = f"Error executing {name}: {str(e)}"
298
+ return [TextContent(type="text", text=error_msg)]
299
+
300
+
301
+ async def main():
302
+ """Main entry point for the MCP server"""
303
+ from mcp.server.stdio import stdio_server
304
+
305
+ async with stdio_server() as (read_stream, write_stream):
306
+ logger.info("MissionControlMCP server starting...")
307
+ await app.run(
308
+ read_stream,
309
+ write_stream,
310
+ app.create_initialization_options()
311
+ )
312
+
313
+
314
+ if __name__ == "__main__":
315
+ import asyncio
316
+ asyncio.run(main())
tools/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ """
2
+ MissionControlMCP Tools Package
3
+ """
tools/data_visualizer.py ADDED
@@ -0,0 +1,231 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Data Visualizer Tool - Create charts from data
3
+ """
4
+ import logging
5
+ from typing import Dict, Any
6
+ import io
7
+ import base64
8
+ import sys
9
+ import os
10
+
11
+ # Add parent directory to path for imports
12
+ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
13
+
14
+ from utils.helpers import parse_json_safe
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ def visualize_data(
20
+ data: str,
21
+ chart_type: str = "bar",
22
+ x_column: str = None,
23
+ y_column: str = None,
24
+ title: str = "Data Visualization"
25
+ ) -> Dict[str, Any]:
26
+ """
27
+ Create a chart visualization from data.
28
+
29
+ Args:
30
+ data: JSON or CSV string data
31
+ chart_type: Type of chart - 'bar', 'line', 'pie', 'scatter'
32
+ x_column: X-axis column name
33
+ y_column: Y-axis column name
34
+ title: Chart title
35
+
36
+ Returns:
37
+ Dictionary with base64 encoded image and metadata
38
+ """
39
+ try:
40
+ import matplotlib.pyplot as plt
41
+ import pandas as pd
42
+ import json
43
+
44
+ # Parse data
45
+ try:
46
+ # Try JSON first
47
+ data_dict = json.loads(data)
48
+ df = pd.DataFrame(data_dict)
49
+ except json.JSONDecodeError:
50
+ # Try CSV
51
+ from io import StringIO
52
+ df = pd.read_csv(StringIO(data))
53
+
54
+ if df.empty:
55
+ raise ValueError("Data is empty")
56
+
57
+ # Auto-select columns if not specified
58
+ if x_column is None and len(df.columns) > 0:
59
+ x_column = df.columns[0]
60
+ if y_column is None and len(df.columns) > 1:
61
+ y_column = df.columns[1]
62
+ elif y_column is None:
63
+ y_column = df.columns[0]
64
+
65
+ # Validate columns exist
66
+ if x_column not in df.columns:
67
+ raise ValueError(f"Column '{x_column}' not found in data")
68
+ if y_column not in df.columns:
69
+ raise ValueError(f"Column '{y_column}' not found in data")
70
+
71
+ # Create figure
72
+ plt.figure(figsize=(10, 6))
73
+
74
+ # Generate chart based on type
75
+ if chart_type == "bar":
76
+ plt.bar(df[x_column], df[y_column])
77
+ plt.xlabel(x_column)
78
+ plt.ylabel(y_column)
79
+
80
+ elif chart_type == "line":
81
+ plt.plot(df[x_column], df[y_column], marker='o')
82
+ plt.xlabel(x_column)
83
+ plt.ylabel(y_column)
84
+ plt.grid(True, alpha=0.3)
85
+
86
+ elif chart_type == "pie":
87
+ plt.pie(df[y_column], labels=df[x_column], autopct='%1.1f%%')
88
+
89
+ elif chart_type == "scatter":
90
+ plt.scatter(df[x_column], df[y_column], alpha=0.6)
91
+ plt.xlabel(x_column)
92
+ plt.ylabel(y_column)
93
+ plt.grid(True, alpha=0.3)
94
+
95
+ else:
96
+ raise ValueError(f"Unknown chart type: {chart_type}")
97
+
98
+ plt.title(title)
99
+ plt.tight_layout()
100
+
101
+ # Convert to base64
102
+ buffer = io.BytesIO()
103
+ plt.savefig(buffer, format='png', dpi=100, bbox_inches='tight')
104
+ buffer.seek(0)
105
+ image_base64 = base64.b64encode(buffer.read()).decode('utf-8')
106
+ plt.close()
107
+
108
+ return {
109
+ "image_base64": image_base64,
110
+ "dimensions": {"width": 1000, "height": 600},
111
+ "chart_type": chart_type,
112
+ "title": title,
113
+ "columns_used": {"x": x_column, "y": y_column}
114
+ }
115
+
116
+ except Exception as e:
117
+ logger.error(f"Error creating visualization: {e}")
118
+ raise
119
+
120
+
121
+ def create_multi_chart(data: str, chart_configs: list) -> Dict[str, Any]:
122
+ """
123
+ Create multiple charts from the same dataset.
124
+
125
+ Args:
126
+ data: JSON or CSV string data
127
+ chart_configs: List of chart configuration dictionaries
128
+
129
+ Returns:
130
+ Dictionary with multiple chart images
131
+ """
132
+ try:
133
+ import matplotlib.pyplot as plt
134
+ import pandas as pd
135
+ import json
136
+
137
+ # Parse data once
138
+ try:
139
+ data_dict = json.loads(data)
140
+ df = pd.DataFrame(data_dict)
141
+ except json.JSONDecodeError:
142
+ from io import StringIO
143
+ df = pd.read_csv(StringIO(data))
144
+
145
+ charts = []
146
+ for idx, config in enumerate(chart_configs):
147
+ try:
148
+ result = visualize_data(
149
+ data,
150
+ chart_type=config.get("chart_type", "bar"),
151
+ x_column=config.get("x_column"),
152
+ y_column=config.get("y_column"),
153
+ title=config.get("title", f"Chart {idx+1}")
154
+ )
155
+ charts.append(result)
156
+ except Exception as e:
157
+ logger.error(f"Error creating chart {idx+1}: {e}")
158
+ charts.append({"error": str(e)})
159
+
160
+ return {
161
+ "total_charts": len(charts),
162
+ "charts": charts
163
+ }
164
+
165
+ except Exception as e:
166
+ logger.error(f"Error creating multi-chart: {e}")
167
+ raise
168
+
169
+
170
+ def generate_statistics_chart(data: str) -> Dict[str, Any]:
171
+ """
172
+ Generate a statistical summary chart from numeric data.
173
+
174
+ Args:
175
+ data: JSON or CSV string with numeric data
176
+
177
+ Returns:
178
+ Dictionary with statistics chart
179
+ """
180
+ try:
181
+ import matplotlib.pyplot as plt
182
+ import pandas as pd
183
+ import json
184
+
185
+ # Parse data
186
+ try:
187
+ data_dict = json.loads(data)
188
+ df = pd.DataFrame(data_dict)
189
+ except json.JSONDecodeError:
190
+ from io import StringIO
191
+ df = pd.read_csv(StringIO(data))
192
+
193
+ # Get numeric columns
194
+ numeric_cols = df.select_dtypes(include=['number']).columns
195
+
196
+ if len(numeric_cols) == 0:
197
+ raise ValueError("No numeric columns found in data")
198
+
199
+ # Create statistics summary
200
+ fig, axes = plt.subplots(1, 2, figsize=(14, 6))
201
+
202
+ # Box plot
203
+ df[numeric_cols].boxplot(ax=axes[0])
204
+ axes[0].set_title("Distribution (Box Plot)")
205
+ axes[0].set_ylabel("Values")
206
+
207
+ # Histogram
208
+ df[numeric_cols].hist(ax=axes[1], bins=20, alpha=0.7)
209
+ axes[1].set_title("Distribution (Histogram)")
210
+
211
+ plt.tight_layout()
212
+
213
+ # Convert to base64
214
+ buffer = io.BytesIO()
215
+ plt.savefig(buffer, format='png', dpi=100, bbox_inches='tight')
216
+ buffer.seek(0)
217
+ image_base64 = base64.b64encode(buffer.read()).decode('utf-8')
218
+ plt.close()
219
+
220
+ # Calculate statistics
221
+ stats = df[numeric_cols].describe().to_dict()
222
+
223
+ return {
224
+ "image_base64": image_base64,
225
+ "statistics": stats,
226
+ "numeric_columns": list(numeric_cols)
227
+ }
228
+
229
+ except Exception as e:
230
+ logger.error(f"Error generating statistics chart: {e}")
231
+ raise
tools/email_intent_classifier.py ADDED
@@ -0,0 +1,234 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Email Intent Classifier Tool - Classify email intents using NLP
3
+ """
4
+ import logging
5
+ from typing import Dict, Any, List
6
+ import re
7
+ import sys
8
+ import os
9
+
10
+ # Add parent directory to path for imports
11
+ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class EmailIntentClassifier:
17
+ """
18
+ Rule-based email intent classifier with confidence scoring
19
+ """
20
+
21
+ # Define intent patterns (keywords and phrases)
22
+ INTENT_PATTERNS = {
23
+ "inquiry": [
24
+ r'\b(question|wondering|curious|clarification|information|details|help)\b',
25
+ r'\b(what|when|where|who|why|how)\b.*\?',
26
+ r'\b(could you|can you|would you).*\b(explain|tell|provide|share)\b'
27
+ ],
28
+ "complaint": [
29
+ r'\b(complaint|issue|problem|disappointed|frustrated|unhappy|angry)\b',
30
+ r'\b(not working|broken|failed|error|mistake)\b',
31
+ r'\b(terrible|awful|worst|horrible|unacceptable)\b'
32
+ ],
33
+ "request": [
34
+ r'\b(please|kindly|request|need|require|would like)\b',
35
+ r'\b(send|provide|share|give|deliver|forward)\b.*\b(me|us)\b',
36
+ r'\b(need|want|looking for)\b'
37
+ ],
38
+ "feedback": [
39
+ r'\b(feedback|suggestion|recommend|improve|enhancement)\b',
40
+ r'\b(think|believe|feel|opinion)\b.*\b(should|could|would)\b',
41
+ r'\b(great|excellent|good|nice|appreciate|love)\b'
42
+ ],
43
+ "meeting": [
44
+ r'\b(meeting|schedule|appointment|call|discuss|conference)\b',
45
+ r'\b(available|availability|free time|calendar)\b',
46
+ r'\b(reschedule|postpone|cancel|confirm)\b'
47
+ ],
48
+ "order": [
49
+ r'\b(order|purchase|buy|payment|invoice|receipt)\b',
50
+ r'\b(shipping|delivery|tracking|status)\b',
51
+ r'\b(product|item|package)\b'
52
+ ],
53
+ "urgent": [
54
+ r'\b(urgent|asap|immediately|critical|emergency|priority)\b',
55
+ r'\b(time-sensitive|deadline|due)\b',
56
+ r'!!+|\bIMPORTANT\b'
57
+ ],
58
+ "follow_up": [
59
+ r'\b(follow up|following up|checking in|reminder)\b',
60
+ r'\b(haven\'t heard|waiting for|still pending)\b',
61
+ r'\b(previous|earlier|sent|mentioned)\b.*\b(email|message)\b'
62
+ ],
63
+ "thank_you": [
64
+ r'\b(thank|thanks|grateful|appreciate|gratitude)\b',
65
+ r'\b(wonderful|excellent|helpful)\b.*\b(work|help|support)\b'
66
+ ],
67
+ "application": [
68
+ r'\b(apply|application|position|job|role|opportunity)\b',
69
+ r'\b(resume|cv|cover letter|portfolio)\b',
70
+ r'\b(interested in|applying for)\b'
71
+ ]
72
+ }
73
+
74
+ def classify(self, email_text: str) -> Dict[str, Any]:
75
+ """
76
+ Classify email intent with confidence scores.
77
+
78
+ Args:
79
+ email_text: Email text to classify
80
+
81
+ Returns:
82
+ Dictionary with primary intent, confidence, and secondary intents
83
+ """
84
+ if not email_text or not email_text.strip():
85
+ raise ValueError("Email text cannot be empty")
86
+
87
+ # Convert to lowercase for matching
88
+ text_lower = email_text.lower()
89
+
90
+ # Calculate scores for each intent
91
+ intent_scores = {}
92
+
93
+ for intent, patterns in self.INTENT_PATTERNS.items():
94
+ score = 0
95
+ matches = 0
96
+
97
+ for pattern in patterns:
98
+ found = re.findall(pattern, text_lower, re.IGNORECASE)
99
+ if found:
100
+ matches += len(found)
101
+ score += len(found)
102
+
103
+ # Normalize score
104
+ if score > 0:
105
+ intent_scores[intent] = min(score / 3.0, 1.0) # Cap at 1.0
106
+
107
+ # If no patterns matched, classify as "general"
108
+ if not intent_scores:
109
+ return {
110
+ "intent": "general",
111
+ "confidence": 0.5,
112
+ "secondary_intents": [],
113
+ "explanation": "No specific intent patterns detected"
114
+ }
115
+
116
+ # Sort by score
117
+ sorted_intents = sorted(intent_scores.items(), key=lambda x: x[1], reverse=True)
118
+
119
+ # Get primary intent
120
+ primary_intent = sorted_intents[0][0]
121
+ primary_confidence = sorted_intents[0][1]
122
+
123
+ # Get secondary intents (top 3)
124
+ secondary_intents = [
125
+ {"intent": intent, "confidence": round(score, 3)}
126
+ for intent, score in sorted_intents[1:4]
127
+ ]
128
+
129
+ return {
130
+ "intent": primary_intent,
131
+ "confidence": round(primary_confidence, 3),
132
+ "secondary_intents": secondary_intents,
133
+ "explanation": f"Detected {primary_intent} intent based on keyword analysis"
134
+ }
135
+
136
+
137
+ def classify_email_intent(email_text: str) -> Dict[str, Any]:
138
+ """
139
+ Classify the intent of an email.
140
+
141
+ Args:
142
+ email_text: Email text to classify
143
+
144
+ Returns:
145
+ Dictionary with classification results
146
+ """
147
+ try:
148
+ classifier = EmailIntentClassifier()
149
+ result = classifier.classify(email_text)
150
+
151
+ # Add metadata
152
+ result["email_length"] = len(email_text)
153
+ result["word_count"] = len(email_text.split())
154
+
155
+ return result
156
+
157
+ except Exception as e:
158
+ logger.error(f"Error classifying email intent: {e}")
159
+ raise
160
+
161
+
162
+ def classify_batch_emails(emails: List[str]) -> Dict[str, Any]:
163
+ """
164
+ Classify multiple emails at once.
165
+
166
+ Args:
167
+ emails: List of email text strings
168
+
169
+ Returns:
170
+ Dictionary with batch classification results
171
+ """
172
+ try:
173
+ classifier = EmailIntentClassifier()
174
+ results = []
175
+
176
+ for idx, email_text in enumerate(emails):
177
+ try:
178
+ result = classifier.classify(email_text)
179
+ result["email_index"] = idx
180
+ results.append(result)
181
+ except Exception as e:
182
+ logger.error(f"Error classifying email {idx}: {e}")
183
+ results.append({
184
+ "email_index": idx,
185
+ "error": str(e),
186
+ "intent": "error",
187
+ "confidence": 0.0
188
+ })
189
+
190
+ # Aggregate statistics
191
+ intent_distribution = {}
192
+ for result in results:
193
+ intent = result.get("intent", "unknown")
194
+ intent_distribution[intent] = intent_distribution.get(intent, 0) + 1
195
+
196
+ return {
197
+ "total_emails": len(emails),
198
+ "results": results,
199
+ "intent_distribution": intent_distribution
200
+ }
201
+
202
+ except Exception as e:
203
+ logger.error(f"Error in batch email classification: {e}")
204
+ raise
205
+
206
+
207
+ def extract_email_features(email_text: str) -> Dict[str, Any]:
208
+ """
209
+ Extract features from an email for analysis.
210
+
211
+ Args:
212
+ email_text: Email text
213
+
214
+ Returns:
215
+ Dictionary with extracted features
216
+ """
217
+ try:
218
+ features = {
219
+ "length": len(email_text),
220
+ "word_count": len(email_text.split()),
221
+ "sentence_count": len(re.split(r'[.!?]+', email_text)),
222
+ "has_greeting": bool(re.search(r'\b(hi|hello|dear|hey)\b', email_text.lower())),
223
+ "has_closing": bool(re.search(r'\b(regards|sincerely|thanks|best)\b', email_text.lower())),
224
+ "question_count": len(re.findall(r'\?', email_text)),
225
+ "exclamation_count": len(re.findall(r'!', email_text)),
226
+ "has_url": bool(re.search(r'https?://', email_text)),
227
+ "has_email_address": bool(re.search(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', email_text))
228
+ }
229
+
230
+ return features
231
+
232
+ except Exception as e:
233
+ logger.error(f"Error extracting email features: {e}")
234
+ raise
tools/file_converter.py ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ File Converter Tool - Convert between different file formats
3
+ """
4
+ import logging
5
+ from typing import Dict, Any
6
+ from pathlib import Path
7
+ import sys
8
+ import os
9
+
10
+ # Add parent directory to path for imports
11
+ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ def convert_file(input_path: str, output_format: str, output_path: str = None) -> Dict[str, Any]:
17
+ """
18
+ Convert a file from one format to another.
19
+
20
+ Supported conversions:
21
+ - PDF to TXT
22
+ - TXT to CSV (assumes structured text)
23
+ - CSV to TXT
24
+ - Any text-based format conversions
25
+
26
+ Args:
27
+ input_path: Path to input file
28
+ output_format: Desired output format ('txt', 'csv', 'pdf')
29
+ output_path: Optional output path; auto-generated if not provided
30
+
31
+ Returns:
32
+ Dictionary with conversion results
33
+ """
34
+ try:
35
+ input_file = Path(input_path)
36
+
37
+ if not input_file.exists():
38
+ raise FileNotFoundError(f"Input file not found: {input_path}")
39
+
40
+ # Determine input format
41
+ input_format = input_file.suffix.lower().replace('.', '')
42
+
43
+ # Generate output path if not provided
44
+ if output_path is None:
45
+ output_path = str(input_file.parent / f"{input_file.stem}.{output_format}")
46
+
47
+ output_file = Path(output_path)
48
+
49
+ # Perform conversion based on formats
50
+ if input_format == 'pdf' and output_format == 'txt':
51
+ success, message = _pdf_to_txt(input_path, output_path)
52
+
53
+ elif input_format == 'txt' and output_format == 'csv':
54
+ success, message = _txt_to_csv(input_path, output_path)
55
+
56
+ elif input_format == 'csv' and output_format == 'txt':
57
+ success, message = _csv_to_txt(input_path, output_path)
58
+
59
+ elif input_format in ['txt', 'md', 'log'] and output_format in ['txt', 'md', 'log']:
60
+ success, message = _text_to_text(input_path, output_path)
61
+
62
+ else:
63
+ raise ValueError(f"Conversion from {input_format} to {output_format} not supported")
64
+
65
+ return {
66
+ "output_path": str(output_file),
67
+ "success": success,
68
+ "message": message,
69
+ "input_format": input_format,
70
+ "output_format": output_format,
71
+ "file_size_bytes": output_file.stat().st_size if output_file.exists() else 0
72
+ }
73
+
74
+ except Exception as e:
75
+ logger.error(f"Error converting file: {e}")
76
+ raise
77
+
78
+
79
+ def _pdf_to_txt(input_path: str, output_path: str) -> tuple:
80
+ """Convert PDF to TXT"""
81
+ try:
82
+ from PyPDF2 import PdfReader
83
+
84
+ reader = PdfReader(input_path)
85
+ text_parts = []
86
+
87
+ for page in reader.pages:
88
+ text = page.extract_text()
89
+ if text:
90
+ text_parts.append(text)
91
+
92
+ full_text = "\n\n".join(text_parts)
93
+
94
+ with open(output_path, 'w', encoding='utf-8') as f:
95
+ f.write(full_text)
96
+
97
+ return True, f"Successfully converted PDF to TXT ({len(reader.pages)} pages)"
98
+
99
+ except Exception as e:
100
+ logger.error(f"PDF to TXT conversion error: {e}")
101
+ return False, str(e)
102
+
103
+
104
+ def _txt_to_csv(input_path: str, output_path: str) -> tuple:
105
+ """Convert TXT to CSV (assumes tab or comma separated values)"""
106
+ try:
107
+ import pandas as pd
108
+
109
+ # Try to read as CSV with different delimiters
110
+ try:
111
+ df = pd.read_csv(input_path, sep='\t')
112
+ except:
113
+ try:
114
+ df = pd.read_csv(input_path, sep=',')
115
+ except:
116
+ # If not structured, create simple CSV with one column
117
+ with open(input_path, 'r', encoding='utf-8') as f:
118
+ lines = f.readlines()
119
+
120
+ df = pd.DataFrame({'text': [line.strip() for line in lines if line.strip()]})
121
+
122
+ df.to_csv(output_path, index=False)
123
+
124
+ return True, f"Successfully converted TXT to CSV ({len(df)} rows)"
125
+
126
+ except Exception as e:
127
+ logger.error(f"TXT to CSV conversion error: {e}")
128
+ return False, str(e)
129
+
130
+
131
+ def _csv_to_txt(input_path: str, output_path: str) -> tuple:
132
+ """Convert CSV to TXT"""
133
+ try:
134
+ import pandas as pd
135
+
136
+ df = pd.read_csv(input_path)
137
+
138
+ # Convert to formatted text
139
+ text = df.to_string(index=False)
140
+
141
+ with open(output_path, 'w', encoding='utf-8') as f:
142
+ f.write(text)
143
+
144
+ return True, f"Successfully converted CSV to TXT ({len(df)} rows)"
145
+
146
+ except Exception as e:
147
+ logger.error(f"CSV to TXT conversion error: {e}")
148
+ return False, str(e)
149
+
150
+
151
+ def _text_to_text(input_path: str, output_path: str) -> tuple:
152
+ """Convert between text-based formats"""
153
+ try:
154
+ with open(input_path, 'r', encoding='utf-8') as f:
155
+ content = f.read()
156
+
157
+ with open(output_path, 'w', encoding='utf-8') as f:
158
+ f.write(content)
159
+
160
+ return True, "Successfully converted text file"
161
+
162
+ except Exception as e:
163
+ logger.error(f"Text to text conversion error: {e}")
164
+ return False, str(e)
165
+
166
+
167
+ def batch_convert(input_files: list, output_format: str) -> Dict[str, Any]:
168
+ """
169
+ Convert multiple files to the same output format.
170
+
171
+ Args:
172
+ input_files: List of input file paths
173
+ output_format: Desired output format for all files
174
+
175
+ Returns:
176
+ Dictionary with batch conversion results
177
+ """
178
+ results = []
179
+
180
+ for input_file in input_files:
181
+ try:
182
+ result = convert_file(input_file, output_format)
183
+ result["input_file"] = input_file
184
+ results.append(result)
185
+ except Exception as e:
186
+ logger.error(f"Error converting {input_file}: {e}")
187
+ results.append({
188
+ "input_file": input_file,
189
+ "success": False,
190
+ "message": str(e)
191
+ })
192
+
193
+ successful = sum(1 for r in results if r.get("success", False))
194
+
195
+ return {
196
+ "total_files": len(input_files),
197
+ "successful": successful,
198
+ "failed": len(input_files) - successful,
199
+ "results": results
200
+ }
tools/kpi_generator.py ADDED
@@ -0,0 +1,292 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ KPI Generator Tool - Generate business KPIs from data
3
+ """
4
+ import logging
5
+ from typing import Dict, Any, List
6
+ import sys
7
+ import os
8
+
9
+ # Add parent directory to path for imports
10
+ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
11
+
12
+ from utils.helpers import parse_json_safe, safe_divide
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ def generate_kpis(data: str, metrics: List[str] = None) -> Dict[str, Any]:
18
+ """
19
+ Generate KPI report from business data.
20
+
21
+ Args:
22
+ data: JSON string containing business data
23
+ metrics: List of metrics to calculate (revenue, growth, efficiency, etc.)
24
+
25
+ Returns:
26
+ Dictionary with calculated KPIs and insights
27
+ """
28
+ try:
29
+ import json
30
+
31
+ # Parse input data
32
+ try:
33
+ business_data = json.loads(data)
34
+ except json.JSONDecodeError as e:
35
+ raise ValueError(f"Invalid JSON data: {e}")
36
+
37
+ if metrics is None:
38
+ metrics = ["revenue", "growth", "efficiency"]
39
+
40
+ kpis = {}
41
+ trends = []
42
+
43
+ # Calculate different KPIs based on requested metrics
44
+ for metric in metrics:
45
+ if metric == "revenue":
46
+ revenue_kpis = _calculate_revenue_kpis(business_data)
47
+ kpis.update(revenue_kpis)
48
+
49
+ elif metric == "growth":
50
+ growth_kpis = _calculate_growth_kpis(business_data)
51
+ kpis.update(growth_kpis)
52
+
53
+ elif metric == "efficiency":
54
+ efficiency_kpis = _calculate_efficiency_kpis(business_data)
55
+ kpis.update(efficiency_kpis)
56
+
57
+ elif metric == "customer":
58
+ customer_kpis = _calculate_customer_kpis(business_data)
59
+ kpis.update(customer_kpis)
60
+
61
+ elif metric == "operational":
62
+ operational_kpis = _calculate_operational_kpis(business_data)
63
+ kpis.update(operational_kpis)
64
+
65
+ # Generate trends
66
+ trends = _identify_trends(kpis, business_data)
67
+
68
+ # Generate executive summary
69
+ summary = _generate_summary(kpis, trends)
70
+
71
+ return {
72
+ "kpis": kpis,
73
+ "summary": summary,
74
+ "trends": trends,
75
+ "metrics_analyzed": metrics,
76
+ "data_points": len(business_data) if isinstance(business_data, list) else len(business_data.keys())
77
+ }
78
+
79
+ except Exception as e:
80
+ logger.error(f"Error generating KPIs: {e}")
81
+ raise
82
+
83
+
84
+ def _calculate_revenue_kpis(data: Dict[str, Any]) -> Dict[str, Any]:
85
+ """Calculate revenue-related KPIs"""
86
+ kpis = {}
87
+
88
+ try:
89
+ # Total Revenue
90
+ if "revenue" in data:
91
+ if isinstance(data["revenue"], list):
92
+ kpis["total_revenue"] = sum(data["revenue"])
93
+ kpis["average_revenue"] = sum(data["revenue"]) / len(data["revenue"])
94
+ kpis["min_revenue"] = min(data["revenue"])
95
+ kpis["max_revenue"] = max(data["revenue"])
96
+ else:
97
+ kpis["total_revenue"] = data["revenue"]
98
+
99
+ # Revenue per customer
100
+ if "revenue" in data and "customers" in data:
101
+ revenue = data["revenue"] if not isinstance(data["revenue"], list) else sum(data["revenue"])
102
+ customers = data["customers"] if not isinstance(data["customers"], list) else sum(data["customers"])
103
+ kpis["revenue_per_customer"] = safe_divide(revenue, customers)
104
+
105
+ # Profit margin
106
+ if "revenue" in data and "costs" in data:
107
+ revenue = data["revenue"] if not isinstance(data["revenue"], list) else sum(data["revenue"])
108
+ costs = data["costs"] if not isinstance(data["costs"], list) else sum(data["costs"])
109
+ profit = revenue - costs
110
+ kpis["profit"] = profit
111
+ kpis["profit_margin_percent"] = safe_divide(profit * 100, revenue)
112
+
113
+ except Exception as e:
114
+ logger.warning(f"Error calculating revenue KPIs: {e}")
115
+
116
+ return kpis
117
+
118
+
119
+ def _calculate_growth_kpis(data: Dict[str, Any]) -> Dict[str, Any]:
120
+ """Calculate growth-related KPIs"""
121
+ kpis = {}
122
+
123
+ try:
124
+ # Year-over-year growth
125
+ if "current_revenue" in data and "previous_revenue" in data:
126
+ growth = data["current_revenue"] - data["previous_revenue"]
127
+ growth_rate = safe_divide(growth * 100, data["previous_revenue"])
128
+ kpis["revenue_growth"] = growth
129
+ kpis["revenue_growth_rate_percent"] = growth_rate
130
+
131
+ # Customer growth
132
+ if "current_customers" in data and "previous_customers" in data:
133
+ customer_growth = data["current_customers"] - data["previous_customers"]
134
+ customer_growth_rate = safe_divide(customer_growth * 100, data["previous_customers"])
135
+ kpis["customer_growth"] = customer_growth
136
+ kpis["customer_growth_rate_percent"] = customer_growth_rate
137
+
138
+ # Monthly growth rate (if time series data provided)
139
+ if "monthly_revenue" in data and isinstance(data["monthly_revenue"], list):
140
+ revenues = data["monthly_revenue"]
141
+ if len(revenues) >= 2:
142
+ recent_growth = safe_divide((revenues[-1] - revenues[-2]) * 100, revenues[-2])
143
+ kpis["recent_monthly_growth_percent"] = recent_growth
144
+
145
+ except Exception as e:
146
+ logger.warning(f"Error calculating growth KPIs: {e}")
147
+
148
+ return kpis
149
+
150
+
151
+ def _calculate_efficiency_kpis(data: Dict[str, Any]) -> Dict[str, Any]:
152
+ """Calculate efficiency-related KPIs"""
153
+ kpis = {}
154
+
155
+ try:
156
+ # Cost per acquisition
157
+ if "marketing_costs" in data and "new_customers" in data:
158
+ kpis["cost_per_acquisition"] = safe_divide(data["marketing_costs"], data["new_customers"])
159
+
160
+ # Operational efficiency
161
+ if "revenue" in data and "operational_costs" in data:
162
+ revenue = data["revenue"] if not isinstance(data["revenue"], list) else sum(data["revenue"])
163
+ kpis["operational_efficiency_ratio"] = safe_divide(revenue, data["operational_costs"])
164
+
165
+ # Employee productivity
166
+ if "revenue" in data and "employees" in data:
167
+ revenue = data["revenue"] if not isinstance(data["revenue"], list) else sum(data["revenue"])
168
+ kpis["revenue_per_employee"] = safe_divide(revenue, data["employees"])
169
+
170
+ # ROI
171
+ if "revenue" in data and "investment" in data:
172
+ revenue = data["revenue"] if not isinstance(data["revenue"], list) else sum(data["revenue"])
173
+ roi = safe_divide((revenue - data["investment"]) * 100, data["investment"])
174
+ kpis["roi_percent"] = roi
175
+
176
+ except Exception as e:
177
+ logger.warning(f"Error calculating efficiency KPIs: {e}")
178
+
179
+ return kpis
180
+
181
+
182
+ def _calculate_customer_kpis(data: Dict[str, Any]) -> Dict[str, Any]:
183
+ """Calculate customer-related KPIs"""
184
+ kpis = {}
185
+
186
+ try:
187
+ # Customer lifetime value
188
+ if "average_purchase_value" in data and "purchase_frequency" in data and "customer_lifespan" in data:
189
+ clv = data["average_purchase_value"] * data["purchase_frequency"] * data["customer_lifespan"]
190
+ kpis["customer_lifetime_value"] = clv
191
+
192
+ # Churn rate
193
+ if "churned_customers" in data and "total_customers" in data:
194
+ kpis["churn_rate_percent"] = safe_divide(data["churned_customers"] * 100, data["total_customers"])
195
+
196
+ # Retention rate
197
+ if "retained_customers" in data and "total_customers" in data:
198
+ kpis["retention_rate_percent"] = safe_divide(data["retained_customers"] * 100, data["total_customers"])
199
+
200
+ # Net Promoter Score (if provided)
201
+ if "nps_score" in data:
202
+ kpis["net_promoter_score"] = data["nps_score"]
203
+
204
+ except Exception as e:
205
+ logger.warning(f"Error calculating customer KPIs: {e}")
206
+
207
+ return kpis
208
+
209
+
210
+ def _calculate_operational_kpis(data: Dict[str, Any]) -> Dict[str, Any]:
211
+ """Calculate operational KPIs"""
212
+ kpis = {}
213
+
214
+ try:
215
+ # Inventory turnover
216
+ if "cost_of_goods_sold" in data and "average_inventory" in data:
217
+ kpis["inventory_turnover"] = safe_divide(data["cost_of_goods_sold"], data["average_inventory"])
218
+
219
+ # Order fulfillment rate
220
+ if "orders_fulfilled" in data and "total_orders" in data:
221
+ kpis["fulfillment_rate_percent"] = safe_divide(data["orders_fulfilled"] * 100, data["total_orders"])
222
+
223
+ # Average response time
224
+ if "total_response_time" in data and "ticket_count" in data:
225
+ kpis["average_response_time"] = safe_divide(data["total_response_time"], data["ticket_count"])
226
+
227
+ except Exception as e:
228
+ logger.warning(f"Error calculating operational KPIs: {e}")
229
+
230
+ return kpis
231
+
232
+
233
+ def _identify_trends(kpis: Dict[str, Any], data: Dict[str, Any]) -> List[str]:
234
+ """Identify key trends from KPIs"""
235
+ trends = []
236
+
237
+ try:
238
+ # Check growth trends
239
+ if "revenue_growth_rate_percent" in kpis:
240
+ rate = kpis["revenue_growth_rate_percent"]
241
+ if rate > 20:
242
+ trends.append(f"Strong revenue growth of {rate:.1f}%")
243
+ elif rate > 0:
244
+ trends.append(f"Positive revenue growth of {rate:.1f}%")
245
+ else:
246
+ trends.append(f"Revenue decline of {abs(rate):.1f}%")
247
+
248
+ # Check profitability
249
+ if "profit_margin_percent" in kpis:
250
+ margin = kpis["profit_margin_percent"]
251
+ if margin > 20:
252
+ trends.append(f"Healthy profit margin at {margin:.1f}%")
253
+ elif margin > 0:
254
+ trends.append(f"Modest profit margin at {margin:.1f}%")
255
+ else:
256
+ trends.append(f"Operating at a loss with {abs(margin):.1f}% negative margin")
257
+
258
+ # Check efficiency
259
+ if "roi_percent" in kpis:
260
+ roi = kpis["roi_percent"]
261
+ if roi > 100:
262
+ trends.append(f"Excellent ROI of {roi:.1f}%")
263
+ elif roi > 0:
264
+ trends.append(f"Positive ROI of {roi:.1f}%")
265
+
266
+ # Check customer metrics
267
+ if "churn_rate_percent" in kpis:
268
+ churn = kpis["churn_rate_percent"]
269
+ if churn > 10:
270
+ trends.append(f"High customer churn rate of {churn:.1f}%")
271
+ else:
272
+ trends.append(f"Healthy churn rate of {churn:.1f}%")
273
+
274
+ except Exception as e:
275
+ logger.warning(f"Error identifying trends: {e}")
276
+
277
+ return trends if trends else ["Insufficient data for trend analysis"]
278
+
279
+
280
+ def _generate_summary(kpis: Dict[str, Any], trends: List[str]) -> str:
281
+ """Generate executive summary"""
282
+ summary_parts = []
283
+
284
+ summary_parts.append("Executive KPI Summary:")
285
+ summary_parts.append(f"- Analyzed {len(kpis)} key performance indicators")
286
+
287
+ if trends:
288
+ summary_parts.append("- Key insights:")
289
+ for trend in trends[:3]: # Top 3 trends
290
+ summary_parts.append(f" • {trend}")
291
+
292
+ return "\n".join(summary_parts)
tools/pdf_reader.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ PDF Reader Tool - Extract text and metadata from PDF files
3
+ """
4
+ import logging
5
+ from typing import Dict, Any
6
+ from pathlib import Path
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ def read_pdf(file_path: str) -> Dict[str, Any]:
12
+ """
13
+ Read and extract text from a PDF file.
14
+
15
+ Args:
16
+ file_path: Path to the PDF file
17
+
18
+ Returns:
19
+ Dictionary containing extracted text, page count, and metadata
20
+ """
21
+ try:
22
+ from PyPDF2 import PdfReader
23
+
24
+ # Validate file exists
25
+ if not Path(file_path).exists():
26
+ raise FileNotFoundError(f"PDF file not found: {file_path}")
27
+
28
+ # Read PDF
29
+ reader = PdfReader(file_path)
30
+
31
+ # Extract text from all pages
32
+ text_parts = []
33
+ for page_num, page in enumerate(reader.pages, 1):
34
+ try:
35
+ text = page.extract_text()
36
+ if text:
37
+ text_parts.append(f"--- Page {page_num} ---\n{text}")
38
+ except Exception as e:
39
+ logger.warning(f"Failed to extract text from page {page_num}: {e}")
40
+ text_parts.append(f"--- Page {page_num} ---\n[Extraction failed]")
41
+
42
+ full_text = "\n\n".join(text_parts)
43
+
44
+ # Extract metadata
45
+ metadata = {}
46
+ if reader.metadata:
47
+ metadata = {
48
+ "author": reader.metadata.get("/Author", "Unknown"),
49
+ "creator": reader.metadata.get("/Creator", "Unknown"),
50
+ "producer": reader.metadata.get("/Producer", "Unknown"),
51
+ "subject": reader.metadata.get("/Subject", "Unknown"),
52
+ "title": reader.metadata.get("/Title", "Unknown"),
53
+ "creation_date": str(reader.metadata.get("/CreationDate", "Unknown"))
54
+ }
55
+
56
+ return {
57
+ "text": full_text,
58
+ "pages": len(reader.pages),
59
+ "metadata": metadata
60
+ }
61
+
62
+ except ImportError:
63
+ logger.error("PyPDF2 not installed. Install with: pip install pypdf2")
64
+ raise
65
+ except Exception as e:
66
+ logger.error(f"Error reading PDF: {e}")
67
+ raise
68
+
69
+
70
+ def get_pdf_info(file_path: str) -> Dict[str, Any]:
71
+ """
72
+ Get basic information about a PDF without extracting all text.
73
+
74
+ Args:
75
+ file_path: Path to the PDF file
76
+
77
+ Returns:
78
+ Dictionary with PDF information
79
+ """
80
+ try:
81
+ from PyPDF2 import PdfReader
82
+
83
+ reader = PdfReader(file_path)
84
+
85
+ return {
86
+ "page_count": len(reader.pages),
87
+ "is_encrypted": reader.is_encrypted,
88
+ "file_size_bytes": Path(file_path).stat().st_size,
89
+ "file_name": Path(file_path).name
90
+ }
91
+ except Exception as e:
92
+ logger.error(f"Error getting PDF info: {e}")
93
+ raise
tools/rag_search.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ RAG Search Tool - Semantic search using vector embeddings
3
+ """
4
+ import logging
5
+ from typing import Dict, Any, List
6
+ import sys
7
+ import os
8
+
9
+ # Add parent directory to path for imports
10
+ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
11
+
12
+ from utils.rag_utils import semantic_search, create_rag_store
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ def search_documents(query: str, documents: List[str], top_k: int = 3) -> Dict[str, Any]:
18
+ """
19
+ Perform semantic search on a collection of documents.
20
+
21
+ Args:
22
+ query: Search query string
23
+ documents: List of document strings to search
24
+ top_k: Number of top results to return
25
+
26
+ Returns:
27
+ Dictionary containing search results with scores
28
+ """
29
+ try:
30
+ if not query or not query.strip():
31
+ raise ValueError("Query cannot be empty")
32
+
33
+ if not documents or len(documents) == 0:
34
+ raise ValueError("Documents list cannot be empty")
35
+
36
+ # Perform semantic search
37
+ results = semantic_search(query, documents, top_k)
38
+
39
+ return {
40
+ "query": query,
41
+ "total_documents": len(documents),
42
+ "returned_results": len(results),
43
+ "results": results
44
+ }
45
+
46
+ except Exception as e:
47
+ logger.error(f"Error performing RAG search: {e}")
48
+ raise
49
+
50
+
51
+ def build_knowledge_base(documents: List[str]) -> Dict[str, Any]:
52
+ """
53
+ Build a knowledge base from documents for later querying.
54
+
55
+ Args:
56
+ documents: List of documents to index
57
+
58
+ Returns:
59
+ Dictionary with knowledge base info
60
+ """
61
+ try:
62
+ if not documents:
63
+ raise ValueError("Documents list cannot be empty")
64
+
65
+ # Create RAG store
66
+ store = create_rag_store(documents)
67
+
68
+ return {
69
+ "success": True,
70
+ "document_count": len(documents),
71
+ "message": "Knowledge base built successfully",
72
+ "store": store # In a real scenario, this would be persisted
73
+ }
74
+
75
+ except Exception as e:
76
+ logger.error(f"Error building knowledge base: {e}")
77
+ raise
78
+
79
+
80
+ def multi_query_search(queries: List[str], documents: List[str], top_k: int = 3) -> Dict[str, Any]:
81
+ """
82
+ Perform multiple searches with different queries on the same document set.
83
+
84
+ Args:
85
+ queries: List of query strings
86
+ documents: List of documents to search
87
+ top_k: Number of results per query
88
+
89
+ Returns:
90
+ Dictionary with results for each query
91
+ """
92
+ try:
93
+ if not queries or not documents:
94
+ raise ValueError("Both queries and documents must be provided")
95
+
96
+ # Build store once for efficiency
97
+ store = create_rag_store(documents)
98
+
99
+ all_results = {}
100
+ for idx, query in enumerate(queries):
101
+ try:
102
+ results = store.search(query, top_k)
103
+ all_results[f"query_{idx+1}"] = {
104
+ "query": query,
105
+ "results": results
106
+ }
107
+ except Exception as e:
108
+ logger.error(f"Error searching query {idx+1}: {e}")
109
+ all_results[f"query_{idx+1}"] = {
110
+ "query": query,
111
+ "error": str(e),
112
+ "results": []
113
+ }
114
+
115
+ return {
116
+ "total_queries": len(queries),
117
+ "total_documents": len(documents),
118
+ "results": all_results
119
+ }
120
+
121
+ except Exception as e:
122
+ logger.error(f"Error in multi-query search: {e}")
123
+ raise
124
+
125
+
126
+ def find_similar_documents(target_doc: str, documents: List[str], top_k: int = 5) -> Dict[str, Any]:
127
+ """
128
+ Find documents similar to a target document.
129
+
130
+ Args:
131
+ target_doc: The document to find similar ones for
132
+ documents: Corpus of documents to search
133
+ top_k: Number of similar documents to return
134
+
135
+ Returns:
136
+ Dictionary with similar documents
137
+ """
138
+ try:
139
+ if not target_doc or not documents:
140
+ raise ValueError("Target document and documents list must be provided")
141
+
142
+ # Use target doc as query
143
+ results = semantic_search(target_doc, documents, top_k)
144
+
145
+ return {
146
+ "target_document": target_doc[:200] + "..." if len(target_doc) > 200 else target_doc,
147
+ "corpus_size": len(documents),
148
+ "similar_documents": results
149
+ }
150
+
151
+ except Exception as e:
152
+ logger.error(f"Error finding similar documents: {e}")
153
+ raise
tools/text_extractor.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Text Extractor Tool - Clean, summarize, and process text
3
+ """
4
+ import logging
5
+ from typing import Dict, Any
6
+ import sys
7
+ import os
8
+
9
+ # Add parent directory to path for imports
10
+ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
11
+
12
+ from utils.helpers import clean_text, chunk_text, summarize_text, extract_keywords
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ def extract_text(text: str, operation: str = "clean", max_length: int = 500) -> Dict[str, Any]:
18
+ """
19
+ Process text based on the specified operation.
20
+
21
+ Args:
22
+ text: Raw text to process
23
+ operation: Operation to perform - 'clean', 'summarize', 'chunk', or 'keywords'
24
+ max_length: Maximum length for summary operations
25
+
26
+ Returns:
27
+ Dictionary containing processed text and metadata
28
+ """
29
+ try:
30
+ if not text or not text.strip():
31
+ raise ValueError("Input text is empty")
32
+
33
+ result = ""
34
+ metadata = {}
35
+
36
+ if operation == "clean":
37
+ result = clean_text(text)
38
+ metadata = {
39
+ "operation": "clean",
40
+ "original_length": len(text),
41
+ "cleaned_length": len(result)
42
+ }
43
+
44
+ elif operation == "summarize":
45
+ result = summarize_text(text, max_length)
46
+ metadata = {
47
+ "operation": "summarize",
48
+ "original_length": len(text),
49
+ "summary_length": len(result),
50
+ "compression_ratio": round(len(result) / len(text), 2) if len(text) > 0 else 0
51
+ }
52
+
53
+ elif operation == "chunk":
54
+ chunks = chunk_text(text, chunk_size=max_length, overlap=50)
55
+ result = "\n\n---CHUNK---\n\n".join(chunks)
56
+ metadata = {
57
+ "operation": "chunk",
58
+ "total_chunks": len(chunks),
59
+ "chunk_size": max_length
60
+ }
61
+
62
+ elif operation == "keywords":
63
+ keywords = extract_keywords(text, top_n=10)
64
+ result = ", ".join(keywords)
65
+ metadata = {
66
+ "operation": "keywords",
67
+ "keyword_count": len(keywords),
68
+ "keywords": keywords
69
+ }
70
+
71
+ else:
72
+ raise ValueError(f"Unknown operation: {operation}. Use 'clean', 'summarize', 'chunk', or 'keywords'")
73
+
74
+ # Calculate word count
75
+ word_count = len(result.split())
76
+
77
+ return {
78
+ "result": result,
79
+ "word_count": word_count,
80
+ "metadata": metadata
81
+ }
82
+
83
+ except Exception as e:
84
+ logger.error(f"Error extracting text: {e}")
85
+ raise
86
+
87
+
88
+ def process_multiple_texts(texts: list, operation: str = "clean") -> list:
89
+ """
90
+ Process multiple texts with the same operation.
91
+
92
+ Args:
93
+ texts: List of text strings to process
94
+ operation: Operation to apply to all texts
95
+
96
+ Returns:
97
+ List of results for each text
98
+ """
99
+ results = []
100
+ for idx, text in enumerate(texts):
101
+ try:
102
+ result = extract_text(text, operation)
103
+ result["index"] = idx
104
+ results.append(result)
105
+ except Exception as e:
106
+ logger.error(f"Error processing text at index {idx}: {e}")
107
+ results.append({
108
+ "index": idx,
109
+ "error": str(e),
110
+ "result": "",
111
+ "word_count": 0
112
+ })
113
+
114
+ return results
tools/web_fetcher.py ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Web Fetcher Tool - Fetch and extract content from web pages
3
+ """
4
+ import logging
5
+ from typing import Dict, Any
6
+ import sys
7
+ import os
8
+
9
+ # Add parent directory to path for imports
10
+ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
11
+
12
+ from utils.helpers import validate_url, clean_text, format_timestamp
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ def fetch_web_content(url: str, extract_text_only: bool = True, timeout: int = 30) -> Dict[str, Any]:
18
+ """
19
+ Fetch content from a web URL.
20
+
21
+ Args:
22
+ url: URL to fetch
23
+ extract_text_only: If True, extract only text content; if False, return HTML
24
+ timeout: Request timeout in seconds
25
+
26
+ Returns:
27
+ Dictionary containing fetched content, status code, and metadata
28
+ """
29
+ try:
30
+ import requests
31
+ from bs4 import BeautifulSoup
32
+
33
+ # Validate URL
34
+ if not validate_url(url):
35
+ raise ValueError(f"Invalid URL format: {url}")
36
+
37
+ # Set headers to mimic a browser
38
+ headers = {
39
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
40
+ }
41
+
42
+ # Fetch content
43
+ response = requests.get(url, headers=headers, timeout=timeout)
44
+ response.raise_for_status()
45
+
46
+ content = ""
47
+ content_type = response.headers.get('Content-Type', '')
48
+
49
+ if extract_text_only and 'text/html' in content_type:
50
+ # Parse HTML and extract text
51
+ soup = BeautifulSoup(response.text, 'html.parser')
52
+
53
+ # Extract title
54
+ title = soup.title.string if soup.title else "No title"
55
+
56
+ # Extract links
57
+ links = []
58
+ for link in soup.find_all('a', href=True):
59
+ href = link.get('href', '')
60
+ if href and not href.startswith('#'):
61
+ links.append(href)
62
+
63
+ # Remove script and style elements
64
+ for script in soup(["script", "style", "nav", "footer", "header"]):
65
+ script.decompose()
66
+
67
+ # Get text
68
+ text = soup.get_text()
69
+
70
+ # Clean up text
71
+ lines = (line.strip() for line in text.splitlines())
72
+ chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
73
+ content = '\n'.join(chunk for chunk in chunks if chunk)
74
+
75
+ # Further clean
76
+ content = clean_text(content)
77
+
78
+ else:
79
+ # Return raw content
80
+ content = response.text
81
+ title = "N/A (non-HTML content)"
82
+ links = []
83
+
84
+ # Build metadata
85
+ metadata = {
86
+ "url": url,
87
+ "status_code": response.status_code,
88
+ "content_type": content_type,
89
+ "content_length": len(content),
90
+ "encoding": response.encoding,
91
+ "timestamp": format_timestamp(),
92
+ "headers": dict(response.headers)
93
+ }
94
+
95
+ return {
96
+ "content": content,
97
+ "status_code": response.status_code,
98
+ "title": title,
99
+ "links": links,
100
+ "metadata": metadata
101
+ }
102
+
103
+ except requests.exceptions.RequestException as e:
104
+ logger.error(f"Request error fetching {url}: {e}")
105
+ raise
106
+ except Exception as e:
107
+ logger.error(f"Error fetching web content: {e}")
108
+ raise
109
+
110
+
111
+ def fetch_multiple_urls(urls: list, extract_text_only: bool = True) -> list:
112
+ """
113
+ Fetch content from multiple URLs.
114
+
115
+ Args:
116
+ urls: List of URLs to fetch
117
+ extract_text_only: Whether to extract text only
118
+
119
+ Returns:
120
+ List of results for each URL
121
+ """
122
+ results = []
123
+ for idx, url in enumerate(urls):
124
+ try:
125
+ result = fetch_web_content(url, extract_text_only)
126
+ result["index"] = idx
127
+ result["success"] = True
128
+ results.append(result)
129
+ except Exception as e:
130
+ logger.error(f"Error fetching URL at index {idx} ({url}): {e}")
131
+ results.append({
132
+ "index": idx,
133
+ "url": url,
134
+ "success": False,
135
+ "error": str(e),
136
+ "content": "",
137
+ "status_code": 0
138
+ })
139
+
140
+ return results
141
+
142
+
143
+ def extract_links(url: str) -> Dict[str, Any]:
144
+ """
145
+ Extract all links from a web page.
146
+
147
+ Args:
148
+ url: URL to extract links from
149
+
150
+ Returns:
151
+ Dictionary with extracted links
152
+ """
153
+ try:
154
+ import requests
155
+ from bs4 import BeautifulSoup
156
+ from urllib.parse import urljoin
157
+
158
+ response = requests.get(url, timeout=30)
159
+ response.raise_for_status()
160
+
161
+ soup = BeautifulSoup(response.text, 'html.parser')
162
+
163
+ links = []
164
+ for link in soup.find_all('a', href=True):
165
+ absolute_url = urljoin(url, link['href'])
166
+ links.append({
167
+ "text": link.get_text(strip=True),
168
+ "href": absolute_url
169
+ })
170
+
171
+ return {
172
+ "url": url,
173
+ "total_links": len(links),
174
+ "links": links
175
+ }
176
+
177
+ except Exception as e:
178
+ logger.error(f"Error extracting links: {e}")
179
+ raise
utils/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ """
2
+ MissionControlMCP Utilities Package
3
+ """
utils/helpers.py ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Helper utility functions
3
+ """
4
+ import re
5
+ import logging
6
+ from typing import List, Dict, Any
7
+ from datetime import datetime
8
+
9
+ # Setup logging
10
+ logging.basicConfig(level=logging.INFO)
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ def clean_text(text: str) -> str:
15
+ """
16
+ Clean and normalize text by removing extra whitespace, special characters, etc.
17
+
18
+ Args:
19
+ text: Raw text to clean
20
+
21
+ Returns:
22
+ Cleaned text string
23
+ """
24
+ # Remove extra whitespace
25
+ text = re.sub(r'\s+', ' ', text)
26
+ # Remove special characters but keep basic punctuation
27
+ text = re.sub(r'[^\w\s.,!?;:\-\'\"()]', '', text)
28
+ # Strip leading/trailing whitespace
29
+ text = text.strip()
30
+ return text
31
+
32
+
33
+ def chunk_text(text: str, chunk_size: int = 500, overlap: int = 50) -> List[str]:
34
+ """
35
+ Split text into overlapping chunks for processing.
36
+
37
+ Args:
38
+ text: Text to chunk
39
+ chunk_size: Size of each chunk in characters
40
+ overlap: Overlap between chunks
41
+
42
+ Returns:
43
+ List of text chunks
44
+ """
45
+ chunks = []
46
+ start = 0
47
+ text_length = len(text)
48
+
49
+ while start < text_length:
50
+ end = start + chunk_size
51
+ chunk = text[start:end]
52
+ chunks.append(chunk)
53
+ start = end - overlap
54
+
55
+ return chunks
56
+
57
+
58
+ def summarize_text(text: str, max_length: int = 500) -> str:
59
+ """
60
+ Create a simple extractive summary by taking the first sentences.
61
+
62
+ Args:
63
+ text: Text to summarize
64
+ max_length: Maximum length of summary
65
+
66
+ Returns:
67
+ Summarized text
68
+ """
69
+ sentences = re.split(r'[.!?]+', text)
70
+ summary = ""
71
+
72
+ for sentence in sentences:
73
+ sentence = sentence.strip()
74
+ if not sentence:
75
+ continue
76
+ if len(summary) + len(sentence) + 2 <= max_length: # +2 for ". "
77
+ summary += sentence + ". "
78
+ else:
79
+ break
80
+
81
+ # If no sentences fit, return truncated text
82
+ if not summary and text:
83
+ summary = text[:max_length].rsplit(' ', 1)[0] + "..."
84
+
85
+ return summary.strip()
86
+
87
+
88
+ def extract_keywords(text: str, top_n: int = 10) -> List[str]:
89
+ """
90
+ Extract top keywords from text using simple frequency analysis.
91
+
92
+ Args:
93
+ text: Text to analyze
94
+ top_n: Number of top keywords to return
95
+
96
+ Returns:
97
+ List of keywords
98
+ """
99
+ # Simple word frequency approach
100
+ words = re.findall(r'\b[a-zA-Z]{4,}\b', text.lower())
101
+
102
+ # Remove common stop words
103
+ stop_words = {'that', 'this', 'with', 'from', 'have', 'been', 'were',
104
+ 'will', 'would', 'could', 'should', 'about', 'their', 'there'}
105
+ words = [w for w in words if w not in stop_words]
106
+
107
+ # Count frequency
108
+ word_freq: Dict[str, int] = {}
109
+ for word in words:
110
+ word_freq[word] = word_freq.get(word, 0) + 1
111
+
112
+ # Sort by frequency and return top N
113
+ sorted_words = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)
114
+ return [word for word, freq in sorted_words[:top_n]]
115
+
116
+
117
+ def validate_url(url: str) -> bool:
118
+ """
119
+ Validate if a string is a proper URL.
120
+
121
+ Args:
122
+ url: URL string to validate
123
+
124
+ Returns:
125
+ True if valid URL, False otherwise
126
+ """
127
+ url_pattern = re.compile(
128
+ r'^https?://' # http:// or https://
129
+ r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?|' # domain...
130
+ r'localhost|' # localhost...
131
+ r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip
132
+ r'(?::\d+)?' # optional port
133
+ r'(?:/?|[/?]\S+)$', re.IGNORECASE)
134
+ return url_pattern.match(url) is not None
135
+
136
+
137
+ def format_timestamp() -> str:
138
+ """
139
+ Get current timestamp in ISO format.
140
+
141
+ Returns:
142
+ ISO formatted timestamp string
143
+ """
144
+ return datetime.now().isoformat()
145
+
146
+
147
+ def safe_divide(numerator: float, denominator: float, default: float = 0.0) -> float:
148
+ """
149
+ Safely divide two numbers, returning default if denominator is zero.
150
+
151
+ Args:
152
+ numerator: Numerator value
153
+ denominator: Denominator value
154
+ default: Default value if division by zero
155
+
156
+ Returns:
157
+ Division result or default
158
+ """
159
+ try:
160
+ return numerator / denominator if denominator != 0 else default
161
+ except (TypeError, ZeroDivisionError):
162
+ return default
163
+
164
+
165
+ def parse_json_safe(json_str: str) -> Dict[str, Any]:
166
+ """
167
+ Safely parse JSON string with error handling.
168
+
169
+ Args:
170
+ json_str: JSON string to parse
171
+
172
+ Returns:
173
+ Parsed dictionary or empty dict on error
174
+ """
175
+ import json
176
+ try:
177
+ return json.loads(json_str)
178
+ except json.JSONDecodeError as e:
179
+ logger.error(f"JSON parse error: {e}")
180
+ return {}
utils/rag_utils.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ RAG (Retrieval Augmented Generation) utilities using FAISS and embeddings
3
+ """
4
+ import numpy as np
5
+ from typing import List, Dict, Any
6
+ import logging
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ class SimpleRAGStore:
12
+ """
13
+ Simple RAG implementation using FAISS for vector similarity search
14
+ """
15
+
16
+ def __init__(self):
17
+ """Initialize the RAG store"""
18
+ self.documents: List[str] = []
19
+ self.embeddings: List[np.ndarray] = []
20
+ self.index = None
21
+ self._model = None
22
+
23
+ def _get_model(self):
24
+ """Lazy load the sentence transformer model"""
25
+ if self._model is None:
26
+ try:
27
+ from sentence_transformers import SentenceTransformer
28
+ self._model = SentenceTransformer('all-MiniLM-L6-v2')
29
+ logger.info("Loaded sentence transformer model")
30
+ except Exception as e:
31
+ logger.error(f"Failed to load sentence transformer: {e}")
32
+ raise
33
+ return self._model
34
+
35
+ def add_documents(self, documents: List[str]) -> None:
36
+ """
37
+ Add documents to the RAG store and build FAISS index.
38
+
39
+ Args:
40
+ documents: List of document strings to add
41
+ """
42
+ import faiss
43
+
44
+ if not documents:
45
+ logger.warning("No documents provided to add")
46
+ return
47
+
48
+ self.documents.extend(documents)
49
+
50
+ # Generate embeddings
51
+ model = self._get_model()
52
+ new_embeddings = model.encode(documents, show_progress_bar=False)
53
+ self.embeddings.extend(new_embeddings)
54
+
55
+ # Build or update FAISS index
56
+ embeddings_array = np.array(self.embeddings).astype('float32')
57
+ dimension = embeddings_array.shape[1]
58
+
59
+ if self.index is None:
60
+ self.index = faiss.IndexFlatL2(dimension)
61
+
62
+ self.index.add(embeddings_array)
63
+ logger.info(f"Added {len(documents)} documents to RAG store")
64
+
65
+ def search(self, query: str, top_k: int = 3) -> List[Dict[str, Any]]:
66
+ """
67
+ Search for similar documents using the query.
68
+
69
+ Args:
70
+ query: Search query string
71
+ top_k: Number of top results to return
72
+
73
+ Returns:
74
+ List of search results with scores
75
+ """
76
+ if self.index is None or len(self.documents) == 0:
77
+ logger.warning("No documents in RAG store")
78
+ return []
79
+
80
+ # Encode query
81
+ model = self._get_model()
82
+ query_embedding = model.encode([query], show_progress_bar=False)
83
+ query_embedding = np.array(query_embedding).astype('float32')
84
+
85
+ # Search FAISS index
86
+ top_k = min(top_k, len(self.documents))
87
+ distances, indices = self.index.search(query_embedding, top_k)
88
+
89
+ # Format results
90
+ results = []
91
+ for i, (distance, idx) in enumerate(zip(distances[0], indices[0])):
92
+ if idx < len(self.documents):
93
+ # Convert L2 distance to similarity score (inverse relationship)
94
+ similarity_score = 1.0 / (1.0 + float(distance))
95
+ results.append({
96
+ "rank": i + 1,
97
+ "document": self.documents[idx],
98
+ "score": round(similarity_score, 4),
99
+ "distance": float(distance)
100
+ })
101
+
102
+ return results
103
+
104
+ def clear(self) -> None:
105
+ """Clear all documents and reset the index"""
106
+ self.documents = []
107
+ self.embeddings = []
108
+ self.index = None
109
+ logger.info("Cleared RAG store")
110
+
111
+
112
+ def create_rag_store(documents: List[str]) -> SimpleRAGStore:
113
+ """
114
+ Factory function to create and populate a RAG store.
115
+
116
+ Args:
117
+ documents: List of documents to add to store
118
+
119
+ Returns:
120
+ Initialized SimpleRAGStore instance
121
+ """
122
+ store = SimpleRAGStore()
123
+ if documents:
124
+ store.add_documents(documents)
125
+ return store
126
+
127
+
128
+ def semantic_search(query: str, documents: List[str], top_k: int = 3) -> List[Dict[str, Any]]:
129
+ """
130
+ Perform semantic search on a list of documents.
131
+
132
+ Args:
133
+ query: Search query
134
+ documents: List of documents to search
135
+ top_k: Number of results to return
136
+
137
+ Returns:
138
+ List of search results
139
+ """
140
+ store = create_rag_store(documents)
141
+ return store.search(query, top_k)