bluestpanda commited on
Commit
4b7b107
Β·
1 Parent(s): 5b2c0c6
Files changed (2) hide show
  1. app.py +680 -0
  2. requirements.txt +6 -3
app.py ADDED
@@ -0,0 +1,680 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ File Upload Analyzer - Streamlit Frontend
4
+ This is a copy of file_upload_app.py for Hugging Face Spaces deployment.
5
+ """
6
+
7
+ import streamlit as st
8
+ import json
9
+ import sys
10
+ import os
11
+ from pathlib import Path
12
+ from typing import Dict, Any
13
+ import io
14
+
15
+ import requests
16
+
17
+ # Try to import structure_analysis, fallback to inline if not available
18
+ try:
19
+ from structure_analysis import (
20
+ detect_summary_fields,
21
+ classify_data_structure,
22
+ get_hierarchy_summary
23
+ )
24
+ except ImportError:
25
+ # Inline fallback implementations
26
+ def detect_summary_fields(data: Any, path: str = "") -> list:
27
+ """Detect summary fields."""
28
+ fields = []
29
+ summary_indicators = ['total', 'count', 'percentage', 'summary', 'aggregate', 'statistics', 'percent']
30
+
31
+ def traverse(obj, current_path=""):
32
+ if isinstance(obj, dict):
33
+ for key, value in obj.items():
34
+ field_path = f"{current_path}.{key}" if current_path else key
35
+ if any(ind in key.lower() for ind in summary_indicators):
36
+ fields.append(field_path)
37
+ if isinstance(value, (dict, list)):
38
+ traverse(value, field_path)
39
+ elif isinstance(obj, list) and len(obj) > 0:
40
+ traverse(obj[0], current_path)
41
+
42
+ traverse(data, path)
43
+ return fields
44
+
45
+ def classify_data_structure(data: Any) -> dict:
46
+ """Classify data structure."""
47
+ return {
48
+ 'summary_fields': [],
49
+ 'config_fields': [],
50
+ 'object_arrays': [],
51
+ 'object_fields': []
52
+ }
53
+
54
+ def get_hierarchy_summary(data: Any) -> dict:
55
+ """Get hierarchy summary."""
56
+ return {
57
+ 'has_summary': False,
58
+ 'has_config': False,
59
+ 'summary_fields': [],
60
+ 'config_fields': [],
61
+ 'levels_present': []
62
+ }
63
+
64
+ # Detect if running on Streamlit Cloud or Hugging Face
65
+ IS_STREAMLIT_CLOUD = os.getenv("STREAMLIT_SHARING_BASE_URL") is not None
66
+ IS_HUGGINGFACE = os.getenv("SPACE_ID") is not None
67
+ IS_ONLINE = IS_STREAMLIT_CLOUD or IS_HUGGINGFACE
68
+
69
+
70
+ # Page config - must be first
71
+ st.set_page_config(
72
+ page_title="JSON Field Analyzer",
73
+ page_icon="πŸ“Š",
74
+ layout="wide",
75
+ initial_sidebar_state="expanded"
76
+ )
77
+
78
+ # Custom CSS
79
+ st.markdown("""
80
+ <style>
81
+ .main > div {
82
+ padding-top: 1rem;
83
+ }
84
+ .stButton>button {
85
+ width: 100%;
86
+ }
87
+ h1 {
88
+ font-size: 2rem;
89
+ }
90
+ h2 {
91
+ font-size: 1.3rem;
92
+ border-bottom: 2px solid #0e1117;
93
+ padding-bottom: 0.3rem;
94
+ }
95
+ .highlight {
96
+ background-color: #f0f2f6;
97
+ color: #262730;
98
+ padding: 1rem;
99
+ border-radius: 5px;
100
+ border-left: 4px solid #1f77b4;
101
+ margin: 1rem 0;
102
+ }
103
+ .highlight p {
104
+ color: #262730;
105
+ margin: 0;
106
+ }
107
+ .result-box {
108
+ background-color: #f0f2f6;
109
+ padding: 1.5rem;
110
+ border-radius: 10px;
111
+ margin: 1rem 0;
112
+ }
113
+ </style>
114
+ """, unsafe_allow_html=True)
115
+
116
+
117
+ class FileAnalyzer:
118
+ """Analyzer for uploaded JSON files."""
119
+
120
+ OLLAMA_API_URL = "http://localhost:11434/api/generate"
121
+ MODEL_NAME = "llama3.2:3b"
122
+
123
+ def __init__(self, data: Dict[str, Any], llm_provider="ollama", api_key=None):
124
+ self.data = data
125
+ self.metadata = None
126
+ self.llm_provider = llm_provider
127
+ self.api_key = api_key
128
+
129
+ def extract_metadata(self, target_field: str) -> Dict[str, Any]:
130
+ """Extract key metadata from the JSON data for LLM analysis."""
131
+ # Enhanced: Detect summary fields and classify structure
132
+ summary_fields = detect_summary_fields(self.data)
133
+ classification = classify_data_structure(self.data)
134
+ hierarchy_summary = get_hierarchy_summary(self.data)
135
+
136
+ # Try to find objects in the data structure
137
+ objects_with_target = self._find_objects_with_target(target_field)
138
+ total = len(objects_with_target)
139
+ target_true = sum(1 for obj in objects_with_target if obj.get(target_field) is True)
140
+ percentage = (target_true / total * 100) if total > 0 else 0
141
+
142
+ metadata = {
143
+ "total_objects": total,
144
+ "target_count": target_true,
145
+ "percentage": round(percentage, 2),
146
+ "summary_fields_detected": summary_fields[:10],
147
+ "classification": classification,
148
+ "hierarchy_summary": hierarchy_summary,
149
+ "has_summary_level": hierarchy_summary['has_summary'],
150
+ "has_config_level": hierarchy_summary['has_config']
151
+ }
152
+
153
+ self.metadata = metadata
154
+ return metadata
155
+
156
+ def _find_objects_with_target(self, target_field: str) -> list:
157
+ """Find all objects in the data structure that contain the target field."""
158
+ found = []
159
+
160
+ def find_fields(obj):
161
+ if isinstance(obj, dict):
162
+ if target_field in obj:
163
+ found.append(obj)
164
+ for value in obj.values():
165
+ find_fields(value)
166
+ elif isinstance(obj, list):
167
+ for item in obj:
168
+ find_fields(item)
169
+
170
+ find_fields(self.data)
171
+ return found
172
+
173
+ def generate_prompt(self, target_field: str) -> str:
174
+ """Generate a hierarchy-aware prompt for the LLM."""
175
+ if not self.metadata:
176
+ self.extract_metadata(target_field)
177
+
178
+ hierarchy = self.metadata.get('hierarchy_summary', {})
179
+ summary_fields = self.metadata.get('summary_fields_detected', [])
180
+ classification = self.metadata.get('classification', {})
181
+
182
+ # Get sample object
183
+ sample = {}
184
+ def find_sample(obj):
185
+ if isinstance(obj, dict):
186
+ if target_field in obj:
187
+ return obj
188
+ for v in obj.values():
189
+ result = find_sample(v)
190
+ if result:
191
+ return result
192
+ elif isinstance(obj, list) and len(obj) > 0:
193
+ return find_sample(obj[0])
194
+ return {}
195
+
196
+ sample = find_sample(self.data)
197
+
198
+ # Get summary sample
199
+ summary_sample = self.data.get('results', {}).get('summary', {}) or self.data.get('summary', {})
200
+
201
+ # Create samples
202
+ sample_object = json.dumps({k: sample[k] for k in list(sample.keys())[:5]}, indent=2) if sample else "{}"
203
+ sample_summary = json.dumps(summary_sample, indent=2) if summary_sample else "{}"
204
+
205
+ # Build hierarchy instruction
206
+ hierarchy_text = f"""
207
+ DATA HIERARCHY (analyze in this priority order):
208
+
209
+ LEVEL 1 - Summary/Aggregate Fields (HIGHEST PRIORITY):
210
+ """
211
+ if summary_fields:
212
+ for field in summary_fields[:5]:
213
+ hierarchy_text += f" βœ“ {field}\n"
214
+ if len(summary_fields) > 5:
215
+ hierarchy_text += f" ... and {len(summary_fields) - 5} more\n"
216
+ else:
217
+ hierarchy_text += " No summary fields detected\n"
218
+
219
+ hierarchy_text += f"""
220
+ LEVEL 2 - Configuration/Compliance Fields:
221
+ """
222
+ config_fields = classification.get('config_fields', [])
223
+ if config_fields:
224
+ for field in config_fields[:3]:
225
+ hierarchy_text += f" βœ“ {field}\n"
226
+ else:
227
+ hierarchy_text += " No config fields detected\n"
228
+
229
+ hierarchy_text += f"""
230
+ LEVEL 3 - Individual Objects:
231
+ βœ“ Sample object fields shown below
232
+
233
+ CRITICAL INSTRUCTION: Check summary fields FIRST! They are the most important for validation.
234
+ """
235
+
236
+ prompt = f"""You are analyzing JSON data to identify important fields related to "{target_field}".
237
+
238
+ {hierarchy_text}
239
+
240
+ CONTEXT:
241
+ - Total objects: {self.metadata.get('total_objects', 0)}
242
+ - Objects with "{target_field}" = true: {self.metadata.get('target_count', 0)}
243
+ - Percentage: {self.metadata.get('percentage', 0)}%
244
+ - Has summary level data: {self.metadata.get('has_summary_level', False)}
245
+
246
+ SAMPLE SUMMARY DATA (check this first):
247
+ {sample_summary}
248
+
249
+ SAMPLE OBJECT DATA:
250
+ {sample_object}
251
+
252
+ TASK:
253
+ Identify 3-4 important fields related to "{target_field}" in this priority order:
254
+ 1. FIRST: Summary/aggregate fields (totals, percentages, counts)
255
+ 2. SECOND: Configuration/compliance fields
256
+ 3. THIRD: Individual object fields (if needed)
257
+
258
+ Generate regex patterns that match JSON format (with quotes).
259
+
260
+ VALIDATION PATTERN EXAMPLES:
261
+ - Compare two aggregate values: "field1"\\s*:\\s*(\\d+)[\\s\\S]*?"field2"\\s*:\\s*(\\d+)
262
+ - Extract percentage: "field_percentage"\\s*:\\s*(\\d+)
263
+ - Extract boolean: "field_name"\\s*:\\s*(true|false)
264
+ - Extract status: "compliance"\\s*:\\s*"([^"]*)"
265
+
266
+ Output ONLY valid JSON:
267
+ {{
268
+ "test_name": "Field Analysis: {target_field}",
269
+ "important_fields": ["field1", "field2", "field3"],
270
+ "reasoning": "Explain prioritization and why these fields matter",
271
+ "generated_regex": ["regex1", "regex2", "regex3"]
272
+ }}
273
+ """
274
+
275
+ return prompt
276
+
277
+ def call_llm(self, prompt: str) -> str:
278
+ """Call the appropriate LLM based on provider."""
279
+ if self.llm_provider == "ollama":
280
+ return self._call_ollama(prompt)
281
+ elif self.llm_provider == "openai":
282
+ return self._call_openai(prompt)
283
+ elif self.llm_provider == "anthropic":
284
+ return self._call_anthropic(prompt)
285
+ elif self.llm_provider == "huggingface":
286
+ return self._call_huggingface(prompt)
287
+ else:
288
+ raise ValueError(f"Unknown LLM provider: {self.llm_provider}")
289
+
290
+ def _call_ollama(self, prompt: str) -> str:
291
+ """Call the Ollama API to generate a response."""
292
+ try:
293
+ payload = {
294
+ "model": self.MODEL_NAME,
295
+ "prompt": prompt,
296
+ "stream": False,
297
+ "format": "json"
298
+ }
299
+
300
+ response = requests.post(self.OLLAMA_API_URL, json=payload, timeout=120)
301
+ response.raise_for_status()
302
+
303
+ result = response.json()
304
+ return result.get('response', '')
305
+
306
+ except requests.exceptions.ConnectionError:
307
+ raise ConnectionError("Cannot connect to Ollama. Make sure Ollama is running.")
308
+ except requests.exceptions.Timeout:
309
+ raise TimeoutError("Ollama request timed out.")
310
+ except requests.exceptions.RequestException as e:
311
+ raise Exception(f"Failed to call Ollama API - {e}")
312
+
313
+ def _call_openai(self, prompt: str) -> str:
314
+ """Call the OpenAI API to generate a response."""
315
+ try:
316
+ from openai import OpenAI
317
+
318
+ client = OpenAI(api_key=self.api_key)
319
+
320
+ response = client.chat.completions.create(
321
+ model="gpt-4o-mini",
322
+ messages=[
323
+ {"role": "system", "content": "You are a JSON data analysis assistant. Always respond with valid JSON."},
324
+ {"role": "user", "content": prompt}
325
+ ],
326
+ temperature=0.3,
327
+ max_tokens=2000
328
+ )
329
+
330
+ return response.choices[0].message.content
331
+
332
+ except ImportError:
333
+ raise ImportError("OpenAI library not installed. Install with: pip install openai")
334
+ except Exception as e:
335
+ raise Exception(f"Failed to call OpenAI API - {e}")
336
+
337
+ def _call_anthropic(self, prompt: str) -> str:
338
+ """Call the Anthropic API to generate a response."""
339
+ try:
340
+ from anthropic import Anthropic
341
+
342
+ client = Anthropic(api_key=self.api_key)
343
+
344
+ response = client.messages.create(
345
+ model="claude-3-5-sonnet-20241022",
346
+ max_tokens=2000,
347
+ temperature=0.3,
348
+ system="You are a JSON data analysis assistant. Always respond with valid JSON.",
349
+ messages=[
350
+ {"role": "user", "content": prompt}
351
+ ]
352
+ )
353
+
354
+ return response.content[0].text
355
+
356
+ except ImportError:
357
+ raise ImportError("Anthropic library not installed. Install with: pip install anthropic")
358
+ except Exception as e:
359
+ raise Exception(f"Failed to call Anthropic API - {e}")
360
+
361
+ def _call_huggingface(self, prompt: str) -> str:
362
+ """Call the Hugging Face Inference API (FREE) to generate a response."""
363
+ try:
364
+ # Use a good free model for text generation
365
+ model_name = self.api_key or "mistralai/Mistral-7B-Instruct-v0.3" # Default free model
366
+
367
+ headers = {
368
+ "Authorization": f"Bearer {self.api_key}" if self.api_key else None,
369
+ "Content-Type": "application/json"
370
+ }
371
+ # Remove None values
372
+ headers = {k: v for k, v in headers.items() if v is not None}
373
+
374
+ # Create a properly formatted prompt
375
+ full_prompt = f"""<s>[INST]You are a JSON data analysis assistant. Always respond with valid JSON only, no explanations.
376
+
377
+ {prompt}[/INST]"""
378
+
379
+ payload = {
380
+ "inputs": full_prompt,
381
+ "parameters": {
382
+ "max_new_tokens": 1000,
383
+ "temperature": 0.3,
384
+ "return_full_text": False
385
+ }
386
+ }
387
+
388
+ api_url = f"https://api-inference.huggingface.co/models/{model_name}"
389
+ response = requests.post(api_url, json=payload, headers=headers, timeout=60)
390
+
391
+ if response.status_code == 503:
392
+ raise Exception("Model is loading. Please wait a moment and try again.")
393
+
394
+ response.raise_for_status()
395
+ result = response.json()
396
+
397
+ # Handle different response formats
398
+ if isinstance(result, list) and len(result) > 0:
399
+ return result[0].get('generated_text', '')
400
+ elif isinstance(result, dict):
401
+ return result.get('generated_text', '')
402
+ else:
403
+ return str(result)
404
+
405
+ except Exception as e:
406
+ raise Exception(f"Failed to call Hugging Face API - {e}")
407
+
408
+ def parse_llm_output(self, output: str) -> Dict[str, Any]:
409
+ """Parse and validate the LLM JSON output."""
410
+ try:
411
+ output = output.strip()
412
+ if output.startswith("```json"):
413
+ output = output[7:]
414
+ if output.startswith("```"):
415
+ output = output[3:]
416
+ if output.endswith("```"):
417
+ output = output[:-3]
418
+ output = output.strip()
419
+
420
+ result = json.loads(output)
421
+ return result
422
+
423
+ except json.JSONDecodeError as e:
424
+ raise ValueError(f"LLM output is not valid JSON - {e}")
425
+
426
+ def analyze(self, target_field: str = "rotation_enabled") -> Dict[str, Any]:
427
+ """Main analysis function."""
428
+ self.extract_metadata(target_field)
429
+ prompt = self.generate_prompt(target_field)
430
+ llm_output = self.call_llm(prompt)
431
+ result = self.parse_llm_output(llm_output)
432
+ return result
433
+
434
+
435
+ def main():
436
+ """Main Streamlit application."""
437
+ st.title("πŸ“Š JSON Field Analyzer")
438
+
439
+ if IS_HUGGINGFACE:
440
+ st.info("πŸ†“ Running on Hugging Face - FREE Hugging Face AI model available! No API key needed.")
441
+
442
+ st.markdown("**Upload a JSON file and analyze important fields using LLM**")
443
+
444
+ # Sidebar for configuration
445
+ with st.sidebar:
446
+ st.header("βš™οΈ Configuration")
447
+
448
+ # Show environment info
449
+ if IS_ONLINE and not IS_HUGGINGFACE:
450
+ st.info("🌐 Running online - Cloud LLM required")
451
+
452
+ # LLM Provider Selection
453
+ # Default to Hugging Face (free) if online, Ollama on local
454
+ if IS_ONLINE:
455
+ default_index = 3 # Hugging Face (Free)
456
+ else:
457
+ default_index = 0 # Ollama
458
+
459
+ llm_provider = st.selectbox(
460
+ "πŸ€– LLM Provider",
461
+ ["Ollama (Local)", "OpenAI (Cloud)", "Anthropic Claude (Cloud)", "Hugging Face (Free 🌟)"],
462
+ index=default_index,
463
+ help="Choose your LLM provider - Hugging Face is FREE and no API key needed!"
464
+ )
465
+
466
+ # Extract provider name and model
467
+ if llm_provider == "Ollama (Local)":
468
+ provider_name = "ollama"
469
+ api_key = None
470
+ if IS_ONLINE:
471
+ st.error("❌ Ollama not available on this platform")
472
+ st.markdown("**Please select a cloud LLM provider:**")
473
+ st.markdown("- OpenAI (Cloud) - GPT-4o Mini")
474
+ st.markdown("- Anthropic Claude (Cloud) - Recommended")
475
+ else:
476
+ st.info("πŸ“ Using local Ollama")
477
+ elif llm_provider == "OpenAI (Cloud)":
478
+ provider_name = "openai"
479
+ api_key = os.getenv("OPENAI_API_KEY") or st.text_input(
480
+ "OpenAI API Key",
481
+ type="password",
482
+ help="Enter your OpenAI API key (or set OPENAI_API_KEY env var)"
483
+ )
484
+ if not api_key:
485
+ st.warning("⚠️ Please enter your OpenAI API key")
486
+ st.info("πŸ’‘ Get key: https://platform.openai.com/api-keys")
487
+ elif llm_provider == "Anthropic Claude (Cloud)":
488
+ provider_name = "anthropic"
489
+ api_key = os.getenv("ANTHROPIC_API_KEY") or st.text_input(
490
+ "Anthropic API Key",
491
+ type="password",
492
+ help="Enter your Anthropic API key (or set ANTHROPIC_API_KEY env var)"
493
+ )
494
+ if not api_key:
495
+ st.warning("⚠️ Please enter your Anthropic API key")
496
+ st.info("πŸ’‘ Get key: https://console.anthropic.com")
497
+ else: # Hugging Face (Free)
498
+ provider_name = "huggingface"
499
+ api_key = os.getenv("HUGGINGFACE_API_KEY") or st.text_input(
500
+ "Hugging Face API Key (Optional)",
501
+ type="password",
502
+ help="Optional: Enter your HF token for faster inference (or set HUGGINGFACE_API_KEY env var)"
503
+ )
504
+ if not api_key:
505
+ st.info("✨ Using free Hugging Face Inference API - no key needed!")
506
+ st.info("πŸ’‘ Optional: Add your token in Settings > Secrets for better performance")
507
+
508
+ st.markdown("---")
509
+
510
+ target_field = st.text_input(
511
+ "Target Field",
512
+ value="rotation_enabled",
513
+ help="The field you want to analyze (e.g., rotation_enabled, ssl_enforced)"
514
+ )
515
+
516
+ st.markdown("---")
517
+ st.markdown("### πŸ“‹ Setup Guides")
518
+
519
+ with st.expander("πŸ”§ Local Ollama Setup"):
520
+ st.code("""
521
+ brew install ollama
522
+ ollama serve
523
+ ollama pull llama3.2:3b
524
+ """, language="bash")
525
+
526
+ with st.expander("☁️ Cloud API Setup"):
527
+ st.markdown("""
528
+ **OpenAI:**
529
+ - Get key: https://platform.openai.com/api-keys
530
+ - Model: GPT-4o Mini
531
+
532
+ **Anthropic:**
533
+ - Get key: https://console.anthropic.com
534
+ - Model: Claude 3.5 Sonnet
535
+ """)
536
+
537
+ # File upload section
538
+ st.markdown("---")
539
+ st.header("πŸ“€ Upload JSON File")
540
+
541
+ uploaded_file = st.file_uploader(
542
+ "Choose a JSON file",
543
+ type=['json'],
544
+ help="Upload a JSON file to analyze"
545
+ )
546
+
547
+ # Display file info if uploaded
548
+ if uploaded_file is not None:
549
+ try:
550
+ # Read file contents
551
+ content = uploaded_file.read()
552
+ data = json.loads(content)
553
+
554
+ st.success("βœ… File uploaded successfully!")
555
+
556
+ # Show file info
557
+ col1, col2 = st.columns(2)
558
+ with col1:
559
+ st.metric("File Size", f"{len(content) / 1024:.2f} KB")
560
+ with col2:
561
+ st.metric("JSON Structure", "Valid" if isinstance(data, (dict, list)) else "Invalid")
562
+
563
+ # Analyze button
564
+ st.markdown("---")
565
+
566
+ col1, col2, col3 = st.columns([1, 2, 1])
567
+ with col2:
568
+ analyze_button = st.button("πŸ” Analyze with LLM", type="primary", use_container_width=True)
569
+
570
+ # Run analysis
571
+ if analyze_button:
572
+ # Prevent Ollama usage on online platforms
573
+ if provider_name == "ollama" and IS_ONLINE:
574
+ st.error("❌ Ollama is not available on this platform")
575
+ st.info("πŸ’‘ Please select 'Anthropic Claude (Cloud)' or 'OpenAI (Cloud)' from the sidebar")
576
+
577
+ # Validate API key for cloud providers (except Hugging Face which is optional)
578
+ elif provider_name in ["openai", "anthropic"] and not api_key:
579
+ st.error("❌ Please enter an API key for the selected cloud provider")
580
+ else:
581
+ try:
582
+ with st.spinner(f"Analyzing with {llm_provider}... This may take a moment."):
583
+ analyzer = FileAnalyzer(data, llm_provider=provider_name, api_key=api_key)
584
+ result = analyzer.analyze(target_field=target_field)
585
+
586
+ # Display results
587
+ st.markdown("---")
588
+ st.header("πŸ“Š Analysis Results")
589
+
590
+ # Main results in columns
591
+ col1, col2 = st.columns(2)
592
+
593
+ with col1:
594
+ st.subheader("πŸ€– Important Fields")
595
+ for i, field in enumerate(result.get('important_fields', []), 1):
596
+ st.markdown(f"**{i}. {field}**")
597
+
598
+ with col2:
599
+ st.subheader("πŸ’‘ Reasoning")
600
+ st.markdown(f'<div class="highlight">{result.get("reasoning", "N/A")}</div>',
601
+ unsafe_allow_html=True)
602
+
603
+ # Regex patterns
604
+ st.markdown("---")
605
+ st.subheader("πŸ”§ Generated Regex Patterns")
606
+
607
+ regex_patterns = result.get('generated_regex', [])
608
+ for i, pattern in enumerate(regex_patterns, 1):
609
+ st.markdown(f"**Pattern {i}:**")
610
+ st.code(pattern, language="regex")
611
+
612
+ # Raw JSON output
613
+ with st.expander("πŸ“„ View Raw JSON Output"):
614
+ st.json(result)
615
+
616
+ # Download results
617
+ st.markdown("---")
618
+ result_json = json.dumps(result, indent=2)
619
+ st.download_button(
620
+ label="⬇️ Download Results",
621
+ data=result_json,
622
+ file_name=f"analysis_{target_field}.json",
623
+ mime="application/json"
624
+ )
625
+
626
+ except ConnectionError as e:
627
+ st.error(f"❌ {e}")
628
+ if provider_name == "ollama":
629
+ st.info("πŸ’‘ Start Ollama with: `ollama serve`")
630
+ else:
631
+ st.info("πŸ’‘ Check your internet connection and API key")
632
+
633
+ except TimeoutError as e:
634
+ st.error(f"❌ {e}")
635
+ st.info("πŸ’‘ The analysis took too long. Try again or use a larger timeout.")
636
+
637
+ except Exception as e:
638
+ st.error(f"❌ Error during analysis: {e}")
639
+ st.exception(e)
640
+
641
+ except json.JSONDecodeError:
642
+ st.error("❌ Invalid JSON file. Please upload a valid JSON file.")
643
+
644
+ except Exception as e:
645
+ st.error(f"❌ Error reading file: {e}")
646
+ st.exception(e)
647
+
648
+ else:
649
+ # Show example when no file is uploaded
650
+ st.info("πŸ‘† Please upload a JSON file to get started")
651
+
652
+ with st.expander("πŸ“– How it works"):
653
+ st.markdown("""
654
+ ### Workflow:
655
+
656
+ 1. **Upload**: Upload your JSON file using the file uploader above
657
+ 2. **Configure**: Set the target field name in the sidebar (default: `rotation_enabled`)
658
+ 3. **Analyze**: Click the "Analyze with LLM" button
659
+ 4. **Review**: View the important fields, reasoning, and regex patterns
660
+ 5. **Download**: Save the results as JSON
661
+
662
+ ### What it does:
663
+
664
+ - Analyzes your JSON structure to detect summary fields, configurations, and objects
665
+ - Uses LLM to identify important fields related to your target
666
+ - Generates regex patterns for data extraction and validation
667
+ - Provides reasoning for why each field is important
668
+
669
+ ### Use cases:
670
+
671
+ - AWS compliance validation (KMS rotation, SSL enforcement, etc.)
672
+ - Data quality checks
673
+ - Automated validation pattern generation
674
+ - Field correlation analysis
675
+ """)
676
+
677
+
678
+ # Call main function - Streamlit will handle errors
679
+ main()
680
+
requirements.txt CHANGED
@@ -1,3 +1,6 @@
1
- altair
2
- pandas
3
- streamlit
 
 
 
 
1
+ requests>=2.31.0
2
+ streamlit>=1.28.0
3
+ pandas>=2.0.0
4
+ openai>=1.0.0
5
+ anthropic>=0.7.0
6
+