bluestpanda commited on
Commit
80e30b4
Β·
1 Parent(s): 98662cd

Add application file

Browse files
Files changed (3) hide show
  1. Dockerfile +21 -0
  2. app.py +577 -0
  3. requirements.txt +6 -0
Dockerfile ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM huggingface/space-ollama:streamlit
2
+
3
+ # Copy your app files
4
+ COPY app.py /app/app.py
5
+ COPY requirements.txt /app/requirements.txt
6
+
7
+ # Install Python dependencies
8
+ RUN pip install -r requirements.txt
9
+
10
+ # Copy structure_analysis if it exists
11
+ COPY structure_analysis.py /app/structure_analysis.py 2>/dev/null || true
12
+
13
+ # Download Ollama model (this takes a few minutes)
14
+ RUN ollama pull llama3.2:3b
15
+
16
+ # Expose Streamlit port
17
+ EXPOSE 7860
18
+
19
+ # Run Streamlit
20
+ CMD ["streamlit", "run", "/app/app.py", "--server.address", "0.0.0.0", "--server.port", "7860"]
21
+
app.py ADDED
@@ -0,0 +1,577 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ File Upload Analyzer - Streamlit Frontend
4
+ This is a copy of file_upload_app.py for Hugging Face Spaces deployment.
5
+ """
6
+
7
+ import streamlit as st
8
+ import json
9
+ import sys
10
+ import os
11
+ from pathlib import Path
12
+ from typing import Dict, Any
13
+ import io
14
+
15
+ try:
16
+ import requests
17
+ except ImportError:
18
+ st.error("Error: requests module not found. Please install it with: pip install requests")
19
+ st.stop()
20
+
21
+ # Try to import structure_analysis, fallback to inline if not available
22
+ try:
23
+ from structure_analysis import (
24
+ detect_summary_fields,
25
+ classify_data_structure,
26
+ get_hierarchy_summary
27
+ )
28
+ except ImportError:
29
+ # Inline fallback implementations
30
+ def detect_summary_fields(data: Any, path: str = "") -> list:
31
+ """Detect summary fields."""
32
+ fields = []
33
+ summary_indicators = ['total', 'count', 'percentage', 'summary', 'aggregate', 'statistics', 'percent']
34
+
35
+ def traverse(obj, current_path=""):
36
+ if isinstance(obj, dict):
37
+ for key, value in obj.items():
38
+ field_path = f"{current_path}.{key}" if current_path else key
39
+ if any(ind in key.lower() for ind in summary_indicators):
40
+ fields.append(field_path)
41
+ if isinstance(value, (dict, list)):
42
+ traverse(value, field_path)
43
+ elif isinstance(obj, list) and len(obj) > 0:
44
+ traverse(obj[0], current_path)
45
+
46
+ traverse(data, path)
47
+ return fields
48
+
49
+ def classify_data_structure(data: Any) -> dict:
50
+ """Classify data structure."""
51
+ return {
52
+ 'summary_fields': [],
53
+ 'config_fields': [],
54
+ 'object_arrays': [],
55
+ 'object_fields': []
56
+ }
57
+
58
+ def get_hierarchy_summary(data: Any) -> dict:
59
+ """Get hierarchy summary."""
60
+ return {
61
+ 'has_summary': False,
62
+ 'has_config': False,
63
+ 'summary_fields': [],
64
+ 'config_fields': [],
65
+ 'levels_present': []
66
+ }
67
+
68
+ # Detect if running on Streamlit Cloud or Hugging Face
69
+ IS_STREAMLIT_CLOUD = os.getenv("STREAMLIT_SHARING_BASE_URL") is not None
70
+ IS_HUGGINGFACE = os.getenv("SPACE_ID") is not None
71
+ IS_ONLINE = IS_STREAMLIT_CLOUD or IS_HUGGINGFACE
72
+
73
+
74
+ # Page config
75
+ st.set_page_config(
76
+ page_title="JSON Field Analyzer",
77
+ page_icon="πŸ“Š",
78
+ layout="wide",
79
+ initial_sidebar_state="expanded"
80
+ )
81
+
82
+ # Custom CSS
83
+ st.markdown("""
84
+ <style>
85
+ .main > div {
86
+ padding-top: 1rem;
87
+ }
88
+ .stButton>button {
89
+ width: 100%;
90
+ }
91
+ h1 {
92
+ font-size: 2rem;
93
+ }
94
+ h2 {
95
+ font-size: 1.3rem;
96
+ border-bottom: 2px solid #0e1117;
97
+ padding-bottom: 0.3rem;
98
+ }
99
+ .highlight {
100
+ background-color: #f0f2f6;
101
+ color: #262730;
102
+ padding: 1rem;
103
+ border-radius: 5px;
104
+ border-left: 4px solid #1f77b4;
105
+ margin: 1rem 0;
106
+ }
107
+ .highlight p {
108
+ color: #262730;
109
+ margin: 0;
110
+ }
111
+ .result-box {
112
+ background-color: #f0f2f6;
113
+ padding: 1.5rem;
114
+ border-radius: 10px;
115
+ margin: 1rem 0;
116
+ }
117
+ </style>
118
+ """, unsafe_allow_html=True)
119
+
120
+
121
+ class FileAnalyzer:
122
+ """Analyzer for uploaded JSON files."""
123
+
124
+ OLLAMA_API_URL = "http://localhost:11434/api/generate"
125
+ MODEL_NAME = "llama3.2:3b"
126
+
127
+ def __init__(self, data: Dict[str, Any], llm_provider="ollama", api_key=None):
128
+ self.data = data
129
+ self.metadata = None
130
+ self.llm_provider = llm_provider
131
+ self.api_key = api_key
132
+
133
+ def extract_metadata(self, target_field: str) -> Dict[str, Any]:
134
+ """Extract key metadata from the JSON data for LLM analysis."""
135
+ # Enhanced: Detect summary fields and classify structure
136
+ summary_fields = detect_summary_fields(self.data)
137
+ classification = classify_data_structure(self.data)
138
+ hierarchy_summary = get_hierarchy_summary(self.data)
139
+
140
+ # Try to find objects in the data structure
141
+ objects_with_target = self._find_objects_with_target(target_field)
142
+ total = len(objects_with_target)
143
+ target_true = sum(1 for obj in objects_with_target if obj.get(target_field) is True)
144
+ percentage = (target_true / total * 100) if total > 0 else 0
145
+
146
+ metadata = {
147
+ "total_objects": total,
148
+ "target_count": target_true,
149
+ "percentage": round(percentage, 2),
150
+ "summary_fields_detected": summary_fields[:10],
151
+ "classification": classification,
152
+ "hierarchy_summary": hierarchy_summary,
153
+ "has_summary_level": hierarchy_summary['has_summary'],
154
+ "has_config_level": hierarchy_summary['has_config']
155
+ }
156
+
157
+ self.metadata = metadata
158
+ return metadata
159
+
160
+ def _find_objects_with_target(self, target_field: str) -> list:
161
+ """Find all objects in the data structure that contain the target field."""
162
+ found = []
163
+
164
+ def find_fields(obj):
165
+ if isinstance(obj, dict):
166
+ if target_field in obj:
167
+ found.append(obj)
168
+ for value in obj.values():
169
+ find_fields(value)
170
+ elif isinstance(obj, list):
171
+ for item in obj:
172
+ find_fields(item)
173
+
174
+ find_fields(self.data)
175
+ return found
176
+
177
+ def generate_prompt(self, target_field: str) -> str:
178
+ """Generate a hierarchy-aware prompt for the LLM."""
179
+ if not self.metadata:
180
+ self.extract_metadata(target_field)
181
+
182
+ hierarchy = self.metadata.get('hierarchy_summary', {})
183
+ summary_fields = self.metadata.get('summary_fields_detected', [])
184
+ classification = self.metadata.get('classification', {})
185
+
186
+ # Get sample object
187
+ sample = {}
188
+ def find_sample(obj):
189
+ if isinstance(obj, dict):
190
+ if target_field in obj:
191
+ return obj
192
+ for v in obj.values():
193
+ result = find_sample(v)
194
+ if result:
195
+ return result
196
+ elif isinstance(obj, list) and len(obj) > 0:
197
+ return find_sample(obj[0])
198
+ return {}
199
+
200
+ sample = find_sample(self.data)
201
+
202
+ # Get summary sample
203
+ summary_sample = self.data.get('results', {}).get('summary', {}) or self.data.get('summary', {})
204
+
205
+ # Create samples
206
+ sample_object = json.dumps({k: sample[k] for k in list(sample.keys())[:5]}, indent=2) if sample else "{}"
207
+ sample_summary = json.dumps(summary_sample, indent=2) if summary_sample else "{}"
208
+
209
+ # Build hierarchy instruction
210
+ hierarchy_text = f"""
211
+ DATA HIERARCHY (analyze in this priority order):
212
+
213
+ LEVEL 1 - Summary/Aggregate Fields (HIGHEST PRIORITY):
214
+ """
215
+ if summary_fields:
216
+ for field in summary_fields[:5]:
217
+ hierarchy_text += f" βœ“ {field}\n"
218
+ if len(summary_fields) > 5:
219
+ hierarchy_text += f" ... and {len(summary_fields) - 5} more\n"
220
+ else:
221
+ hierarchy_text += " No summary fields detected\n"
222
+
223
+ hierarchy_text += f"""
224
+ LEVEL 2 - Configuration/Compliance Fields:
225
+ """
226
+ config_fields = classification.get('config_fields', [])
227
+ if config_fields:
228
+ for field in config_fields[:3]:
229
+ hierarchy_text += f" βœ“ {field}\n"
230
+ else:
231
+ hierarchy_text += " No config fields detected\n"
232
+
233
+ hierarchy_text += f"""
234
+ LEVEL 3 - Individual Objects:
235
+ βœ“ Sample object fields shown below
236
+
237
+ CRITICAL INSTRUCTION: Check summary fields FIRST! They are the most important for validation.
238
+ """
239
+
240
+ prompt = f"""You are analyzing JSON data to identify important fields related to "{target_field}".
241
+
242
+ {hierarchy_text}
243
+
244
+ CONTEXT:
245
+ - Total objects: {self.metadata.get('total_objects', 0)}
246
+ - Objects with "{target_field}" = true: {self.metadata.get('target_count', 0)}
247
+ - Percentage: {self.metadata.get('percentage', 0)}%
248
+ - Has summary level data: {self.metadata.get('has_summary_level', False)}
249
+
250
+ SAMPLE SUMMARY DATA (check this first):
251
+ {sample_summary}
252
+
253
+ SAMPLE OBJECT DATA:
254
+ {sample_object}
255
+
256
+ TASK:
257
+ Identify 3-4 important fields related to "{target_field}" in this priority order:
258
+ 1. FIRST: Summary/aggregate fields (totals, percentages, counts)
259
+ 2. SECOND: Configuration/compliance fields
260
+ 3. THIRD: Individual object fields (if needed)
261
+
262
+ Generate regex patterns that match JSON format (with quotes).
263
+
264
+ VALIDATION PATTERN EXAMPLES:
265
+ - Compare two aggregate values: "field1"\\s*:\\s*(\\d+)[\\s\\S]*?"field2"\\s*:\\s*(\\d+)
266
+ - Extract percentage: "field_percentage"\\s*:\\s*(\\d+)
267
+ - Extract boolean: "field_name"\\s*:\\s*(true|false)
268
+ - Extract status: "compliance"\\s*:\\s*"([^"]*)"
269
+
270
+ Output ONLY valid JSON:
271
+ {{
272
+ "test_name": "Field Analysis: {target_field}",
273
+ "important_fields": ["field1", "field2", "field3"],
274
+ "reasoning": "Explain prioritization and why these fields matter",
275
+ "generated_regex": ["regex1", "regex2", "regex3"]
276
+ }}
277
+ """
278
+
279
+ return prompt
280
+
281
+ def call_llm(self, prompt: str) -> str:
282
+ """Call the appropriate LLM based on provider."""
283
+ if self.llm_provider == "ollama":
284
+ return self._call_ollama(prompt)
285
+ elif self.llm_provider == "openai":
286
+ return self._call_openai(prompt)
287
+ elif self.llm_provider == "anthropic":
288
+ return self._call_anthropic(prompt)
289
+ else:
290
+ raise ValueError(f"Unknown LLM provider: {self.llm_provider}")
291
+
292
+ def _call_ollama(self, prompt: str) -> str:
293
+ """Call the Ollama API to generate a response."""
294
+ try:
295
+ payload = {
296
+ "model": self.MODEL_NAME,
297
+ "prompt": prompt,
298
+ "stream": False,
299
+ "format": "json"
300
+ }
301
+
302
+ response = requests.post(self.OLLAMA_API_URL, json=payload, timeout=120)
303
+ response.raise_for_status()
304
+
305
+ result = response.json()
306
+ return result.get('response', '')
307
+
308
+ except requests.exceptions.ConnectionError:
309
+ raise ConnectionError("Cannot connect to Ollama. Make sure Ollama is running.")
310
+ except requests.exceptions.Timeout:
311
+ raise TimeoutError("Ollama request timed out.")
312
+ except requests.exceptions.RequestException as e:
313
+ raise Exception(f"Failed to call Ollama API - {e}")
314
+
315
+ def parse_llm_output(self, output: str) -> Dict[str, Any]:
316
+ """Parse and validate the LLM JSON output."""
317
+ try:
318
+ output = output.strip()
319
+ if output.startswith("```json"):
320
+ output = output[7:]
321
+ if output.startswith("```"):
322
+ output = output[3:]
323
+ if output.endswith("```"):
324
+ output = output[:-3]
325
+ output = output.strip()
326
+
327
+ result = json.loads(output)
328
+ return result
329
+
330
+ except json.JSONDecodeError as e:
331
+ raise ValueError(f"LLM output is not valid JSON - {e}")
332
+
333
+ def analyze(self, target_field: str = "rotation_enabled") -> Dict[str, Any]:
334
+ """Main analysis function."""
335
+ self.extract_metadata(target_field)
336
+ prompt = self.generate_prompt(target_field)
337
+ llm_output = self.call_llm(prompt)
338
+ result = self.parse_llm_output(llm_output)
339
+ return result
340
+
341
+
342
+ def main():
343
+ """Main Streamlit application."""
344
+ st.title("πŸ“Š JSON Field Analyzer")
345
+
346
+ if IS_HUGGINGFACE:
347
+ st.info("πŸ†“ Running on Hugging Face - Ollama available!")
348
+
349
+ st.markdown("**Upload a JSON file and analyze important fields using LLM**")
350
+
351
+ # Sidebar for configuration
352
+ with st.sidebar:
353
+ st.header("βš™οΈ Configuration")
354
+
355
+ # Show environment info
356
+ if IS_ONLINE and not IS_HUGGINGFACE:
357
+ st.info("🌐 Running online - Cloud LLM required")
358
+
359
+ # LLM Provider Selection
360
+ # Default to Anthropic if on Streamlit Cloud, Ollama on HF/local
361
+ if IS_STREAMLIT_CLOUD:
362
+ default_index = 2 # Anthropic Claude
363
+ else:
364
+ default_index = 0 # Ollama
365
+
366
+ llm_provider = st.selectbox(
367
+ "πŸ€– LLM Provider",
368
+ ["Ollama (Local)", "OpenAI (Cloud)", "Anthropic Claude (Cloud)"],
369
+ index=default_index,
370
+ help="Choose your LLM provider"
371
+ )
372
+
373
+ # Extract provider name and model
374
+ if llm_provider == "Ollama (Local)":
375
+ provider_name = "ollama"
376
+ api_key = None
377
+ if IS_STREAMLIT_CLOUD:
378
+ st.error("❌ Ollama not available on Streamlit Cloud")
379
+ st.markdown("**Please select a cloud LLM provider:**")
380
+ st.markdown("- OpenAI (Cloud) - GPT-4o Mini")
381
+ st.markdown("- Anthropic Claude (Cloud) - Recommended")
382
+ else:
383
+ st.info("πŸ“ Using local Ollama")
384
+ elif llm_provider == "OpenAI (Cloud)":
385
+ provider_name = "openai"
386
+ api_key = os.getenv("OPENAI_API_KEY") or st.text_input(
387
+ "OpenAI API Key",
388
+ type="password",
389
+ help="Enter your OpenAI API key (or set OPENAI_API_KEY env var)"
390
+ )
391
+ if not api_key:
392
+ st.warning("⚠️ Please enter your OpenAI API key")
393
+ st.info("πŸ’‘ Get key: https://platform.openai.com/api-keys")
394
+ else: # Anthropic
395
+ provider_name = "anthropic"
396
+ api_key = os.getenv("ANTHROPIC_API_KEY") or st.text_input(
397
+ "Anthropic API Key",
398
+ type="password",
399
+ help="Enter your Anthropic API key (or set ANTHROPIC_API_KEY env var)"
400
+ )
401
+ if not api_key:
402
+ st.warning("⚠️ Please enter your Anthropic API key")
403
+ st.info("πŸ’‘ Get key: https://console.anthropic.com")
404
+
405
+ st.markdown("---")
406
+
407
+ target_field = st.text_input(
408
+ "Target Field",
409
+ value="rotation_enabled",
410
+ help="The field you want to analyze (e.g., rotation_enabled, ssl_enforced)"
411
+ )
412
+
413
+ st.markdown("---")
414
+ st.markdown("### πŸ“‹ Setup Guides")
415
+
416
+ with st.expander("πŸ”§ Local Ollama Setup"):
417
+ st.code("""
418
+ brew install ollama
419
+ ollama serve
420
+ ollama pull llama3.2:3b
421
+ """, language="bash")
422
+
423
+ with st.expander("☁️ Cloud API Setup"):
424
+ st.markdown("""
425
+ **OpenAI:**
426
+ - Get key: https://platform.openai.com/api-keys
427
+ - Model: GPT-4o Mini
428
+
429
+ **Anthropic:**
430
+ - Get key: https://console.anthropic.com
431
+ - Model: Claude 3.5 Sonnet
432
+ """)
433
+
434
+ # File upload section
435
+ st.markdown("---")
436
+ st.header("πŸ“€ Upload JSON File")
437
+
438
+ uploaded_file = st.file_uploader(
439
+ "Choose a JSON file",
440
+ type=['json'],
441
+ help="Upload a JSON file to analyze"
442
+ )
443
+
444
+ # Display file info if uploaded
445
+ if uploaded_file is not None:
446
+ try:
447
+ # Read file contents
448
+ content = uploaded_file.read()
449
+ data = json.loads(content)
450
+
451
+ st.success("βœ… File uploaded successfully!")
452
+
453
+ # Show file info
454
+ col1, col2 = st.columns(2)
455
+ with col1:
456
+ st.metric("File Size", f"{len(content) / 1024:.2f} KB")
457
+ with col2:
458
+ st.metric("JSON Structure", "Valid" if isinstance(data, (dict, list)) else "Invalid")
459
+
460
+ # Analyze button
461
+ st.markdown("---")
462
+
463
+ col1, col2, col3 = st.columns([1, 2, 1])
464
+ with col2:
465
+ analyze_button = st.button("πŸ” Analyze with LLM", type="primary", use_container_width=True)
466
+
467
+ # Run analysis
468
+ if analyze_button:
469
+ # Prevent Ollama usage on Streamlit Cloud
470
+ if provider_name == "ollama" and IS_STREAMLIT_CLOUD:
471
+ st.error("❌ Ollama is not available on Streamlit Cloud")
472
+ st.info("πŸ’‘ Please select 'Anthropic Claude (Cloud)' or 'OpenAI (Cloud)' from the sidebar")
473
+
474
+ # Validate API key for cloud providers
475
+ elif provider_name in ["openai", "anthropic"] and not api_key:
476
+ st.error("❌ Please enter an API key for the selected cloud provider")
477
+ else:
478
+ try:
479
+ with st.spinner(f"Analyzing with {llm_provider}... This may take a moment."):
480
+ analyzer = FileAnalyzer(data, llm_provider=provider_name, api_key=api_key)
481
+ result = analyzer.analyze(target_field=target_field)
482
+
483
+ # Display results
484
+ st.markdown("---")
485
+ st.header("πŸ“Š Analysis Results")
486
+
487
+ # Main results in columns
488
+ col1, col2 = st.columns(2)
489
+
490
+ with col1:
491
+ st.subheader("πŸ€– Important Fields")
492
+ for i, field in enumerate(result.get('important_fields', []), 1):
493
+ st.markdown(f"**{i}. {field}**")
494
+
495
+ with col2:
496
+ st.subheader("πŸ’‘ Reasoning")
497
+ st.markdown(f'<div class="highlight">{result.get("reasoning", "N/A")}</div>',
498
+ unsafe_allow_html=True)
499
+
500
+ # Regex patterns
501
+ st.markdown("---")
502
+ st.subheader("πŸ”§ Generated Regex Patterns")
503
+
504
+ regex_patterns = result.get('generated_regex', [])
505
+ for i, pattern in enumerate(regex_patterns, 1):
506
+ st.markdown(f"**Pattern {i}:**")
507
+ st.code(pattern, language="regex")
508
+
509
+ # Raw JSON output
510
+ with st.expander("πŸ“„ View Raw JSON Output"):
511
+ st.json(result)
512
+
513
+ # Download results
514
+ st.markdown("---")
515
+ result_json = json.dumps(result, indent=2)
516
+ st.download_button(
517
+ label="⬇️ Download Results",
518
+ data=result_json,
519
+ file_name=f"analysis_{target_field}.json",
520
+ mime="application/json"
521
+ )
522
+
523
+ except ConnectionError as e:
524
+ st.error(f"❌ {e}")
525
+ if provider_name == "ollama":
526
+ st.info("πŸ’‘ Start Ollama with: `ollama serve`")
527
+ else:
528
+ st.info("πŸ’‘ Check your internet connection and API key")
529
+
530
+ except TimeoutError as e:
531
+ st.error(f"❌ {e}")
532
+ st.info("πŸ’‘ The analysis took too long. Try again or use a larger timeout.")
533
+
534
+ except Exception as e:
535
+ st.error(f"❌ Error during analysis: {e}")
536
+ st.exception(e)
537
+
538
+ except json.JSONDecodeError:
539
+ st.error("❌ Invalid JSON file. Please upload a valid JSON file.")
540
+
541
+ except Exception as e:
542
+ st.error(f"❌ Error reading file: {e}")
543
+ st.exception(e)
544
+
545
+ else:
546
+ # Show example when no file is uploaded
547
+ st.info("πŸ‘† Please upload a JSON file to get started")
548
+
549
+ with st.expander("πŸ“– How it works"):
550
+ st.markdown("""
551
+ ### Workflow:
552
+
553
+ 1. **Upload**: Upload your JSON file using the file uploader above
554
+ 2. **Configure**: Set the target field name in the sidebar (default: `rotation_enabled`)
555
+ 3. **Analyze**: Click the "Analyze with LLM" button
556
+ 4. **Review**: View the important fields, reasoning, and regex patterns
557
+ 5. **Download**: Save the results as JSON
558
+
559
+ ### What it does:
560
+
561
+ - Analyzes your JSON structure to detect summary fields, configurations, and objects
562
+ - Uses LLM to identify important fields related to your target
563
+ - Generates regex patterns for data extraction and validation
564
+ - Provides reasoning for why each field is important
565
+
566
+ ### Use cases:
567
+
568
+ - AWS compliance validation (KMS rotation, SSL enforcement, etc.)
569
+ - Data quality checks
570
+ - Automated validation pattern generation
571
+ - Field correlation analysis
572
+ """)
573
+
574
+
575
+ if __name__ == "__main__":
576
+ main()
577
+
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ requests>=2.31.0
2
+ streamlit>=1.28.0
3
+ pandas>=2.0.0
4
+ openai>=1.0.0
5
+ anthropic>=0.7.0
6
+