Spaces:

Khamad
/

Paramify-test

Sleeping

App Files Files Community

bluestpanda commited on Oct 29, 2025

Commit

9714df8

1 Parent(s): 4b7b107

2nd

Browse files

Files changed (1) hide show

app.py +289 -613

app.py CHANGED Viewed

@@ -1,20 +1,23 @@
 #!/usr/bin/env python3
 """
-File Upload Analyzer - Streamlit Frontend
-This is a copy of file_upload_app.py for Hugging Face Spaces deployment.
 """
 import streamlit as st
 import json
-import sys
-import os
 from pathlib import Path
 from typing import Dict, Any
 import io
-import requests
-# Try to import structure_analysis, fallback to inline if not available
 try:
     from structure_analysis import (
         detect_summary_fields,
@@ -22,659 +25,332 @@ try:
         get_hierarchy_summary
     )
 except ImportError:
-    # Inline fallback implementations
-    def detect_summary_fields(data: Any, path: str = "") -> list:
-        """Detect summary fields."""
-        fields = []
-        summary_indicators = ['total', 'count', 'percentage', 'summary', 'aggregate', 'statistics', 'percent']
-        def traverse(obj, current_path=""):
-            if isinstance(obj, dict):
-                for key, value in obj.items():
-                    field_path = f"{current_path}.{key}" if current_path else key
-                    if any(ind in key.lower() for ind in summary_indicators):
-                        fields.append(field_path)
-                    if isinstance(value, (dict, list)):
-                        traverse(value, field_path)
-            elif isinstance(obj, list) and len(obj) > 0:
-                traverse(obj[0], current_path)
-        traverse(data, path)
-        return fields
-    def classify_data_structure(data: Any) -> dict:
-        """Classify data structure."""
-        return {
-            'summary_fields': [],
-            'config_fields': [],
-            'object_arrays': [],
-            'object_fields': []
-        }
-    def get_hierarchy_summary(data: Any) -> dict:
-        """Get hierarchy summary."""
-        return {
-            'has_summary': False,
-            'has_config': False,
-            'summary_fields': [],
-            'config_fields': [],
-            'levels_present': []
-        }
-# Detect if running on Streamlit Cloud or Hugging Face
-IS_STREAMLIT_CLOUD = os.getenv("STREAMLIT_SHARING_BASE_URL") is not None
-IS_HUGGINGFACE = os.getenv("SPACE_ID") is not None
-IS_ONLINE = IS_STREAMLIT_CLOUD or IS_HUGGINGFACE
-# Page config - must be first
-st.set_page_config(
-    page_title="JSON Field Analyzer",
-    page_icon="📊",
-    layout="wide",
-    initial_sidebar_state="expanded"
-)
-# Custom CSS
-st.markdown("""
-<style>
-    .main > div {
-        padding-top: 1rem;
-    }
-    .stButton>button {
-        width: 100%;
-    }
-    h1 {
-        font-size: 2rem;
-    }
-    h2 {
-        font-size: 1.3rem;
-        border-bottom: 2px solid #0e1117;
-        padding-bottom: 0.3rem;
-    }
-    .highlight {
-        background-color: #f0f2f6;
-        color: #262730;
-        padding: 1rem;
-        border-radius: 5px;
-        border-left: 4px solid #1f77b4;
-        margin: 1rem 0;
-    }
-    .highlight p {
-        color: #262730;
-        margin: 0;
-    }
-    .result-box {
-        background-color: #f0f2f6;
-        padding: 1.5rem;
-        border-radius: 10px;
-        margin: 1rem 0;
-    }
-</style>
-""", unsafe_allow_html=True)
-class FileAnalyzer:
-    """Analyzer for uploaded JSON files."""
-    OLLAMA_API_URL = "http://localhost:11434/api/generate"
-    MODEL_NAME = "llama3.2:3b"
-    def __init__(self, data: Dict[str, Any], llm_provider="ollama", api_key=None):
-        self.data = data
-        self.metadata = None
-        self.llm_provider = llm_provider
-        self.api_key = api_key
-    def extract_metadata(self, target_field: str) -> Dict[str, Any]:
-        """Extract key metadata from the JSON data for LLM analysis."""
-        # Enhanced: Detect summary fields and classify structure
-        summary_fields = detect_summary_fields(self.data)
-        classification = classify_data_structure(self.data)
-        hierarchy_summary = get_hierarchy_summary(self.data)
-        # Try to find objects in the data structure
-        objects_with_target = self._find_objects_with_target(target_field)
-        total = len(objects_with_target)
-        target_true = sum(1 for obj in objects_with_target if obj.get(target_field) is True)
-        percentage = (target_true / total * 100) if total > 0 else 0
-        metadata = {
-            "total_objects": total,
-            "target_count": target_true,
-            "percentage": round(percentage, 2),
-            "summary_fields_detected": summary_fields[:10],
-            "classification": classification,
-            "hierarchy_summary": hierarchy_summary,
-            "has_summary_level": hierarchy_summary['has_summary'],
-            "has_config_level": hierarchy_summary['has_config']
-        }
-        self.metadata = metadata
-        return metadata
-    def _find_objects_with_target(self, target_field: str) -> list:
-        """Find all objects in the data structure that contain the target field."""
-        found = []
-        def find_fields(obj):
-            if isinstance(obj, dict):
-                if target_field in obj:
-                    found.append(obj)
-                for value in obj.values():
-                    find_fields(value)
-            elif isinstance(obj, list):
-                for item in obj:
-                    find_fields(item)
-        find_fields(self.data)
-        return found
-    def generate_prompt(self, target_field: str) -> str:
-        """Generate a hierarchy-aware prompt for the LLM."""
-        if not self.metadata:
-            self.extract_metadata(target_field)
-        hierarchy = self.metadata.get('hierarchy_summary', {})
-        summary_fields = self.metadata.get('summary_fields_detected', [])
-        classification = self.metadata.get('classification', {})
-        # Get sample object
-        sample = {}
-        def find_sample(obj):
-            if isinstance(obj, dict):
-                if target_field in obj:
-                    return obj
-                for v in obj.values():
-                    result = find_sample(v)
-                    if result:
-                        return result
-            elif isinstance(obj, list) and len(obj) > 0:
-                return find_sample(obj[0])
-            return {}
-        sample = find_sample(self.data)
-        # Get summary sample
-        summary_sample = self.data.get('results', {}).get('summary', {}) or self.data.get('summary', {})
-        # Create samples
-        sample_object = json.dumps({k: sample[k] for k in list(sample.keys())[:5]}, indent=2) if sample else "{}"
-        sample_summary = json.dumps(summary_sample, indent=2) if summary_sample else "{}"
-        # Build hierarchy instruction
-        hierarchy_text = f"""
-DATA HIERARCHY (analyze in this priority order):
-LEVEL 1 - Summary/Aggregate Fields (HIGHEST PRIORITY):
-"""
-        if summary_fields:
-            for field in summary_fields[:5]:
-                hierarchy_text += f"  ✓ {field}\n"
-            if len(summary_fields) > 5:
-                hierarchy_text += f"  ... and {len(summary_fields) - 5} more\n"
-        else:
-            hierarchy_text += "  No summary fields detected\n"
-        hierarchy_text += f"""
-LEVEL 2 - Configuration/Compliance Fields:
-"""
-        config_fields = classification.get('config_fields', [])
-        if config_fields:
-            for field in config_fields[:3]:
-                hierarchy_text += f"  ✓ {field}\n"
-        else:
-            hierarchy_text += "  No config fields detected\n"
-        hierarchy_text += f"""
-LEVEL 3 - Individual Objects:
-  ✓ Sample object fields shown below
-CRITICAL INSTRUCTION: Check summary fields FIRST! They are the most important for validation.
-"""
-        prompt = f"""You are analyzing JSON data to identify important fields related to "{target_field}".
-{hierarchy_text}
-CONTEXT:
-- Total objects: {self.metadata.get('total_objects', 0)}
-- Objects with "{target_field}" = true: {self.metadata.get('target_count', 0)}
-- Percentage: {self.metadata.get('percentage', 0)}%
-- Has summary level data: {self.metadata.get('has_summary_level', False)}
-SAMPLE SUMMARY DATA (check this first):
-{sample_summary}
-SAMPLE OBJECT DATA:
-{sample_object}
-TASK:
-Identify 3-4 important fields related to "{target_field}" in this priority order:
-1. FIRST: Summary/aggregate fields (totals, percentages, counts)
-2. SECOND: Configuration/compliance fields
-3. THIRD: Individual object fields (if needed)
-Generate regex patterns that match JSON format (with quotes).
-VALIDATION PATTERN EXAMPLES:
-- Compare two aggregate values: "field1"\\s*:\\s*(\\d+)[\\s\\S]*?"field2"\\s*:\\s*(\\d+)
-- Extract percentage: "field_percentage"\\s*:\\s*(\\d+)
-- Extract boolean: "field_name"\\s*:\\s*(true|false)
-- Extract status: "compliance"\\s*:\\s*"([^"]*)"
-Output ONLY valid JSON:
-{{
-  "test_name": "Field Analysis: {target_field}",
-  "important_fields": ["field1", "field2", "field3"],
-  "reasoning": "Explain prioritization and why these fields matter",
-  "generated_regex": ["regex1", "regex2", "regex3"]
-}}
-"""
-        return prompt
-    def call_llm(self, prompt: str) -> str:
-        """Call the appropriate LLM based on provider."""
-        if self.llm_provider == "ollama":
-            return self._call_ollama(prompt)
-        elif self.llm_provider == "openai":
-            return self._call_openai(prompt)
-        elif self.llm_provider == "anthropic":
-            return self._call_anthropic(prompt)
-        elif self.llm_provider == "huggingface":
-            return self._call_huggingface(prompt)
-        else:
-            raise ValueError(f"Unknown LLM provider: {self.llm_provider}")
-    def _call_ollama(self, prompt: str) -> str:
-        """Call the Ollama API to generate a response."""
-        try:
-            payload = {
-                "model": self.MODEL_NAME,
-                "prompt": prompt,
-                "stream": False,
-                "format": "json"
-            }
-            response = requests.post(self.OLLAMA_API_URL, json=payload, timeout=120)
-            response.raise_for_status()
-            result = response.json()
-            return result.get('response', '')
-        except requests.exceptions.ConnectionError:
-            raise ConnectionError("Cannot connect to Ollama. Make sure Ollama is running.")
-        except requests.exceptions.Timeout:
-            raise TimeoutError("Ollama request timed out.")
-        except requests.exceptions.RequestException as e:
-            raise Exception(f"Failed to call Ollama API - {e}")
-    def _call_openai(self, prompt: str) -> str:
-        """Call the OpenAI API to generate a response."""
-        try:
-            from openai import OpenAI
-            client = OpenAI(api_key=self.api_key)
-            response = client.chat.completions.create(
-                model="gpt-4o-mini",
-                messages=[
-                    {"role": "system", "content": "You are a JSON data analysis assistant. Always respond with valid JSON."},
-                    {"role": "user", "content": prompt}
-                ],
-                temperature=0.3,
-                max_tokens=2000
-            )
-            return response.choices[0].message.content
-        except ImportError:
-            raise ImportError("OpenAI library not installed. Install with: pip install openai")
-        except Exception as e:
-            raise Exception(f"Failed to call OpenAI API - {e}")
-    def _call_anthropic(self, prompt: str) -> str:
-        """Call the Anthropic API to generate a response."""
-        try:
-            from anthropic import Anthropic
-            client = Anthropic(api_key=self.api_key)
-            response = client.messages.create(
-                model="claude-3-5-sonnet-20241022",
-                max_tokens=2000,
-                temperature=0.3,
-                system="You are a JSON data analysis assistant. Always respond with valid JSON.",
-                messages=[
-                    {"role": "user", "content": prompt}
-                ]
-            )
-            return response.content[0].text
-        except ImportError:
-            raise ImportError("Anthropic library not installed. Install with: pip install anthropic")
-        except Exception as e:
-            raise Exception(f"Failed to call Anthropic API - {e}")
-    def _call_huggingface(self, prompt: str) -> str:
-        """Call the Hugging Face Inference API (FREE) to generate a response."""
-        try:
-            # Use a good free model for text generation
-            model_name = self.api_key or "mistralai/Mistral-7B-Instruct-v0.3"  # Default free model
-            headers = {
-                "Authorization": f"Bearer {self.api_key}" if self.api_key else None,
-                "Content-Type": "application/json"
-            }
-            # Remove None values
-            headers = {k: v for k, v in headers.items() if v is not None}
-            # Create a properly formatted prompt
-            full_prompt = f"""<s>[INST]You are a JSON data analysis assistant. Always respond with valid JSON only, no explanations.
-{prompt}[/INST]"""
-            payload = {
-                "inputs": full_prompt,
-                "parameters": {
-                    "max_new_tokens": 1000,
-                    "temperature": 0.3,
-                    "return_full_text": False
-                }
-            }
-            api_url = f"https://api-inference.huggingface.co/models/{model_name}"
-            response = requests.post(api_url, json=payload, headers=headers, timeout=60)
-            if response.status_code == 503:
-                raise Exception("Model is loading. Please wait a moment and try again.")
-            response.raise_for_status()
-            result = response.json()
-            # Handle different response formats
-            if isinstance(result, list) and len(result) > 0:
-                return result[0].get('generated_text', '')
-            elif isinstance(result, dict):
-                return result.get('generated_text', '')
-            else:
-                return str(result)
-        except Exception as e:
-            raise Exception(f"Failed to call Hugging Face API - {e}")
-    def parse_llm_output(self, output: str) -> Dict[str, Any]:
-        """Parse and validate the LLM JSON output."""
-        try:
-            output = output.strip()
-            if output.startswith("```json"):
-                output = output[7:]
-            if output.startswith("```"):
-                output = output[3:]
-            if output.endswith("```"):
-                output = output[:-3]
-            output = output.strip()
-            result = json.loads(output)
-            return result
-        except json.JSONDecodeError as e:
-            raise ValueError(f"LLM output is not valid JSON - {e}")
-    def analyze(self, target_field: str = "rotation_enabled") -> Dict[str, Any]:
-        """Main analysis function."""
-        self.extract_metadata(target_field)
-        prompt = self.generate_prompt(target_field)
-        llm_output = self.call_llm(prompt)
-        result = self.parse_llm_output(llm_output)
-        return result
-def main():
-    """Main Streamlit application."""
-    st.title("📊 JSON Field Analyzer")
-    if IS_HUGGINGFACE:
-        st.info("🆓 Running on Hugging Face - FREE Hugging Face AI model available! No API key needed.")
-    st.markdown("**Upload a JSON file and analyze important fields using LLM**")
-    # Sidebar for configuration
-    with st.sidebar:
-        st.header("⚙️ Configuration")
-        # Show environment info
-        if IS_ONLINE and not IS_HUGGINGFACE:
-            st.info("🌐 Running online - Cloud LLM required")
-        # LLM Provider Selection
-        # Default to Hugging Face (free) if online, Ollama on local
-        if IS_ONLINE:
-            default_index = 3  # Hugging Face (Free)
         else:
-            default_index = 0  # Ollama
-        llm_provider = st.selectbox(
-            "🤖 LLM Provider",
-            ["Ollama (Local)", "OpenAI (Cloud)", "Anthropic Claude (Cloud)", "Hugging Face (Free 🌟)"],
-            index=default_index,
-            help="Choose your LLM provider - Hugging Face is FREE and no API key needed!"
-        )
-        # Extract provider name and model
-        if llm_provider == "Ollama (Local)":
-            provider_name = "ollama"
-            api_key = None
-            if IS_ONLINE:
-                st.error("❌ Ollama not available on this platform")
-                st.markdown("**Please select a cloud LLM provider:**")
-                st.markdown("- OpenAI (Cloud) - GPT-4o Mini")
-                st.markdown("- Anthropic Claude (Cloud) - Recommended")
             else:
-                st.info("📝 Using local Ollama")
-        elif llm_provider == "OpenAI (Cloud)":
-            provider_name = "openai"
-            api_key = os.getenv("OPENAI_API_KEY") or st.text_input(
-                "OpenAI API Key",
-                type="password",
-                help="Enter your OpenAI API key (or set OPENAI_API_KEY env var)"
-            )
-            if not api_key:
-                st.warning("⚠️ Please enter your OpenAI API key")
-                st.info("💡 Get key: https://platform.openai.com/api-keys")
-        elif llm_provider == "Anthropic Claude (Cloud)":
-            provider_name = "anthropic"
-            api_key = os.getenv("ANTHROPIC_API_KEY") or st.text_input(
-                "Anthropic API Key",
-                type="password",
-                help="Enter your Anthropic API key (or set ANTHROPIC_API_KEY env var)"
-            )
-            if not api_key:
-                st.warning("⚠️ Please enter your Anthropic API key")
-                st.info("💡 Get key: https://console.anthropic.com")
-        else:  # Hugging Face (Free)
-            provider_name = "huggingface"
-            api_key = os.getenv("HUGGINGFACE_API_KEY") or st.text_input(
-                "Hugging Face API Key (Optional)",
-                type="password",
-                help="Optional: Enter your HF token for faster inference (or set HUGGINGFACE_API_KEY env var)"
-            )
-            if not api_key:
-                st.info("✨ Using free Hugging Face Inference API - no key needed!")
-                st.info("💡 Optional: Add your token in Settings > Secrets for better performance")
-        st.markdown("---")
-        target_field = st.text_input(
-            "Target Field",
-            value="rotation_enabled",
-            help="The field you want to analyze (e.g., rotation_enabled, ssl_enforced)"
-        )
-        st.markdown("---")
-        st.markdown("### 📋 Setup Guides")
-        with st.expander("🔧 Local Ollama Setup"):
-            st.code("""
-brew install ollama
-ollama serve
-ollama pull llama3.2:3b
-            """, language="bash")
-        with st.expander("☁️ Cloud API Setup"):
-            st.markdown("""
-            **OpenAI:**
-            - Get key: https://platform.openai.com/api-keys
-            - Model: GPT-4o Mini
-            **Anthropic:**
-            - Get key: https://console.anthropic.com
-            - Model: Claude 3.5 Sonnet
-            """)
-    # File upload section
-    st.markdown("---")
-    st.header("📤 Upload JSON File")
     uploaded_file = st.file_uploader(
         "Choose a JSON file",
         type=['json'],
-        help="Upload a JSON file to analyze"
     )
-    # Display file info if uploaded
     if uploaded_file is not None:
         try:
-            # Read file contents
             content = uploaded_file.read()
             data = json.loads(content)
-            st.success("✅ File uploaded successfully!")
-            # Show file info
-            col1, col2 = st.columns(2)
-            with col1:
-                st.metric("File Size", f"{len(content) / 1024:.2f} KB")
-            with col2:
-                st.metric("JSON Structure", "Valid" if isinstance(data, (dict, list)) else "Invalid")
-            # Analyze button
-            st.markdown("---")
-            col1, col2, col3 = st.columns([1, 2, 1])
-            with col2:
-                analyze_button = st.button("🔍 Analyze with LLM", type="primary", use_container_width=True)
-            # Run analysis
-            if analyze_button:
-                # Prevent Ollama usage on online platforms
-                if provider_name == "ollama" and IS_ONLINE:
-                    st.error("❌ Ollama is not available on this platform")
-                    st.info("💡 Please select 'Anthropic Claude (Cloud)' or 'OpenAI (Cloud)' from the sidebar")
-                # Validate API key for cloud providers (except Hugging Face which is optional)
-                elif provider_name in ["openai", "anthropic"] and not api_key:
-                    st.error("❌ Please enter an API key for the selected cloud provider")
-                else:
-                    try:
-                        with st.spinner(f"Analyzing with {llm_provider}... This may take a moment."):
-                            analyzer = FileAnalyzer(data, llm_provider=provider_name, api_key=api_key)
-                            result = analyzer.analyze(target_field=target_field)
-                        # Display results
-                        st.markdown("---")
-                        st.header("📊 Analysis Results")
-                        # Main results in columns
-                        col1, col2 = st.columns(2)
-                        with col1:
-                            st.subheader("🤖 Important Fields")
-                            for i, field in enumerate(result.get('important_fields', []), 1):
-                                st.markdown(f"**{i}. {field}**")
-                        with col2:
-                            st.subheader("💡 Reasoning")
-                            st.markdown(f'<div class="highlight">{result.get("reasoning", "N/A")}</div>',
-                                      unsafe_allow_html=True)
-                        # Regex patterns
-                        st.markdown("---")
-                        st.subheader("🔧 Generated Regex Patterns")
-                        regex_patterns = result.get('generated_regex', [])
-                        for i, pattern in enumerate(regex_patterns, 1):
-                            st.markdown(f"**Pattern {i}:**")
-                            st.code(pattern, language="regex")
-                        # Raw JSON output
-                        with st.expander("📄 View Raw JSON Output"):
-                            st.json(result)
-                        # Download results
-                        st.markdown("---")
-                        result_json = json.dumps(result, indent=2)
                         st.download_button(
-                            label="⬇️ Download Results",
-                            data=result_json,
-                            file_name=f"analysis_{target_field}.json",
                             mime="application/json"
                         )
-                    except ConnectionError as e:
-                        st.error(f"❌ {e}")
-                        if provider_name == "ollama":
-                            st.info("💡 Start Ollama with: `ollama serve`")
-                        else:
-                            st.info("💡 Check your internet connection and API key")
-                    except TimeoutError as e:
-                        st.error(f"❌ {e}")
-                        st.info("💡 The analysis took too long. Try again or use a larger timeout.")
-                    except Exception as e:
-                        st.error(f"❌ Error during analysis: {e}")
-                        st.exception(e)
-        except json.JSONDecodeError:
-            st.error("❌ Invalid JSON file. Please upload a valid JSON file.")
         except Exception as e:
-            st.error(f"❌ Error reading file: {e}")
-            st.exception(e)
     else:
-        # Show example when no file is uploaded
-        st.info("👆 Please upload a JSON file to get started")
-        with st.expander("📖 How it works"):
             st.markdown("""
-            ### Workflow:
-            1. **Upload**: Upload your JSON file using the file uploader above
-            2. **Configure**: Set the target field name in the sidebar (default: `rotation_enabled`)
-            3. **Analyze**: Click the "Analyze with LLM" button
-            4. **Review**: View the important fields, reasoning, and regex patterns
-            5. **Download**: Save the results as JSON
-            ### What it does:
-            - Analyzes your JSON structure to detect summary fields, configurations, and objects
-            - Uses LLM to identify important fields related to your target
-            - Generates regex patterns for data extraction and validation
-            - Provides reasoning for why each field is important
-            ### Use cases:
-            - AWS compliance validation (KMS rotation, SSL enforcement, etc.)
-            - Data quality checks
-            - Automated validation pattern generation
-            - Field correlation analysis
             """)
-# Call main function - Streamlit will handle errors
-main()

 #!/usr/bin/env python3
 """
+Hugging Face Streamlit App for LLM Field Analyzer
+Upload a JSON file and analyze important fields with pattern generation.
 """
 import streamlit as st
 import json
 from pathlib import Path
 from typing import Dict, Any
 import io
+# Page configuration
+st.set_page_config(
+    page_title="Field Correlation Analyzer",
+    page_icon="🤖",
+    layout="wide"
+)
+# Import our modules
 try:
     from structure_analysis import (
         detect_summary_fields,
         get_hierarchy_summary
     )
 except ImportError:
+    st.error("⚠️ structure_analysis.py not found. Make sure all files are uploaded.")
+    st.stop()
+# Session state
+if 'analysis_result' not in st.session_state:
+    st.session_state.analysis_result = None
+def analyze_with_llm(data: Dict[str, Any], target_field: str = "rotation_enabled") -> Dict[str, Any]:
+    """
+    Analyze data and generate a prompt for LLM analysis.
+    Returns structured analysis without requiring Ollama.
+    """
+    # Detect summary fields
+    summary_fields = detect_summary_fields(data)
+    classification = classify_data_structure(data)
+    hierarchy_summary = get_hierarchy_summary(data)
+    # Extract samples
+    sample_object = {}
+    if 'results' in data:
+        for section in data['results'].values():
+            if isinstance(section, list) and len(section) > 0:
+                sample_object = section[0]
+                break
+            elif isinstance(section, dict):
+                for key, value in section.items():
+                    if isinstance(value, list) and len(value) > 0:
+                        sample_object = value[0] if isinstance(value[0], dict) else {}
+                        break
+    summary_sample = data.get('results', {}).get('summary', {}) or data.get('summary', {})
+    # Count objects with target field
+    def count_objects_with_field(obj, field_name):
+        count = 0
+        if isinstance(obj, dict):
+            if field_name in obj:
+                count += 1
+            for v in obj.values():
+                count += count_objects_with_field(v, field_name)
+        elif isinstance(obj, list):
+            for item in obj:
+                count += count_objects_with_field(item, field_name)
+        return count
+    total_objects = count_objects_with_field(data, target_field)
+    # Generate analysis
+    analysis = {
+        "summary_fields_detected": summary_fields[:10],
+        "classification": classification,
+        "hierarchy_summary": hierarchy_summary,
+        "total_objects": total_objects,
+        "sample_object": sample_object,
+        "summary_sample": summary_sample,
+        "recommended_fields": []
+    }
+    # Recommend fields based on priority
+    if summary_fields:
+        analysis["recommended_fields"].extend(summary_fields[:3])
+    if classification.get('config_fields'):
+        analysis["recommended_fields"].extend(classification['config_fields'][:2])
+    if sample_object:
+        analysis["recommended_fields"].extend([k for k in sample_object.keys() if target_field in k.lower()])
+    return analysis
+def generate_regex_patterns(field_names: list, data_sample: dict, summary_sample: dict) -> list:
+    """Generate regex patterns for given fields."""
+    patterns = []
+    for field in field_names:
+        # Try to find the field value type
+        field_lower = field.lower()
+        # Check in summary first
+        if 'summary' in str(field):
+            field_name = field.split('.')[-1]
+            # Boolean pattern
+            if field_name in summary_sample and isinstance(summary_sample.get(field_name), bool):
+                patterns.append(f'"summary.{field_name}"\\s*:\\s*(true|false)')
+            # Number pattern
+            elif isinstance(summary_sample.get(field_name), (int, float)):
+                patterns.append(f'"summary.{field_name}"\\s*:\\s*(\\d+)')
+        # Check in object
+        elif field in data_sample:
+            value = data_sample[field]
+            if isinstance(value, bool):
+                patterns.append(f'"{field}"\\s*:\\s*(true|false)')
+            elif isinstance(value, (int, float)):
+                patterns.append(f'"{field}"\\s*:\\s*(\\d+)')
+            elif isinstance(value, str):
+                patterns.append(f'"{field}"\\s*:\\s*"([^"]*)"')
         else:
+            # Generic pattern based on field name
+            if 'percentage' in field_lower or 'count' in field_lower or 'total' in field_lower:
+                patterns.append(f'"{field}"\\s*:\\s*(\\d+)')
+            elif 'enabled' in field_lower or 'enforced' in field_lower:
+                patterns.append(f'"{field}"\\s*:\\s*(true|false)')
             else:
+                patterns.append(f'"{field}"\\s*:\\s*"([^"]*)"')
+    return patterns
+def main():
+    """Main application."""
+    st.title("🤖 Field Correlation Analyzer")
+    st.markdown("Upload a JSON file to analyze important fields and generate regex patterns")
+    # File upload
     uploaded_file = st.file_uploader(
         "Choose a JSON file",
         type=['json'],
+        help="Upload a JSON file with structured data"
     )
     if uploaded_file is not None:
+        # Read and parse JSON
         try:
             content = uploaded_file.read()
             data = json.loads(content)
+            st.success("✅ File loaded successfully!")
+            # Sidebar for settings
+            with st.sidebar:
+                st.header("⚙️ Settings")
+                # Target field input
+                target_field = st.text_input(
+                    "Target Field",
+                    value="rotation_enabled",
+                    help="The field you want to analyze"
+                )
+                # Analyze button
+                if st.button("🔍 Analyze", type="primary"):
+                    with st.spinner("Analyzing data structure..."):
+                        analysis_result = analyze_with_llm(data, target_field)
+                        st.session_state.analysis_result = analysis_result
+                        st.session_state.data = data
+            # Display results if available
+            if st.session_state.analysis_result:
+                analysis = st.session_state.analysis_result
+                # Summary metrics
+                col1, col2, col3, col4 = st.columns(4)
+                with col1:
+                    st.metric("Summary Fields", len(analysis['summary_fields_detected']))
+                with col2:
+                    st.metric("Total Objects", analysis['total_objects'])
+                with col3:
+                    st.metric("Has Summary", "Yes" if analysis['hierarchy_summary']['has_summary'] else "No")
+                with col4:
+                    st.metric("Config Fields", len(analysis['classification'].get('config_fields', [])))
+                st.markdown("---")
+                # Create tabs
+                tab1, tab2, tab3, tab4 = st.tabs([
+                    "📊 Structure Analysis",
+                    "🎯 Field Recommendations",
+                    "📝 Generated Patterns",
+                    "📄 Raw Data"
+                ])
+                with tab1:
+                    st.subheader("Data Hierarchy")
+                    # Summary fields
+                    if analysis['summary_fields_detected']:
+                        st.markdown("#### Level 1: Summary/Aggregate Fields (Highest Priority)")
+                        for field in analysis['summary_fields_detected'][:10]:
+                            st.write(f"✓ `{field}`")
+                    # Config fields
+                    config_fields = analysis['classification'].get('config_fields', [])
+                    if config_fields:
+                        st.markdown("#### Level 2: Configuration/Compliance Fields")
+                        for field in config_fields[:10]:
+                            st.write(f"✓ `{field}`")
+                    # Object arrays
+                    object_arrays = analysis['classification'].get('object_arrays', [])
+                    if object_arrays:
+                        st.markdown("#### Level 3: Object Arrays")
+                        for field in object_arrays[:5]:
+                            st.write(f"✓ `{field}`")
+                    # Show sample data
+                    with st.expander("📋 View Summary Data Sample"):
+                        st.json(analysis['summary_sample'])
+                    with st.expander("📋 View Object Data Sample"):
+                        st.json(analysis['sample_object'])
+                with tab2:
+                    st.subheader("Recommended Fields for Analysis")
+                    if analysis['recommended_fields']:
+                        st.info("These fields are recommended based on the data hierarchy and target field.")
+                        # Let user select fields
+                        selected_fields = st.multiselect(
+                            "Select fields to generate patterns for:",
+                            analysis['recommended_fields'],
+                            default=analysis['recommended_fields'][:3]
+                        )
+                        if selected_fields and st.button("Generate Patterns"):
+                            patterns = generate_regex_patterns(
+                                selected_fields,
+                                analysis['sample_object'],
+                                analysis['summary_sample']
+                            )
+                            st.session_state.generated_patterns = {
+                                'fields': selected_fields,
+                                'patterns': patterns
+                            }
+                    else:
+                        st.warning("No recommended fields found.")
+                with tab3:
+                    if 'generated_patterns' in st.session_state:
+                        patterns_data = st.session_state.generated_patterns
+                        st.subheader("Generated Regex Patterns")
+                        # Show patterns
+                        for i, (field, pattern) in enumerate(zip(patterns_data['fields'], patterns_data['patterns']), 1):
+                            st.markdown(f"**Pattern {i}: {field}**")
+                            st.code(pattern, language="regex", line_numbers=False)
+                            st.markdown("---")
+                        # Copy to clipboard
+                        all_patterns = "\n".join(patterns_data['patterns'])
+                        st.text_area(
+                            "All Patterns (copy this):",
+                            all_patterns,
+                            height=100
+                        )
+                        # JSON export
+                        export_data = {
+                            "test_name": "Field Analysis",
+                            "important_fields": patterns_data['fields'],
+                            "reasoning": "Fields identified using hierarchical analysis prioritizing summary/aggregate fields",
+                            "generated_regex": patterns_data['patterns']
+                        }
                         st.download_button(
+                            label="📥 Download as JSON",
+                            data=json.dumps(export_data, indent=2),
+                            file_name="analysis_result.json",
                             mime="application/json"
                         )
+                    else:
+                        st.info("👆 Go to 'Field Recommendations' tab to select fields and generate patterns.")
+                with tab4:
+                    st.subheader("Raw Data Structure")
+                    # Full data viewer
+                    st.json(data)
+                    # Download raw data
+                    st.download_button(
+                        label="📥 Download Raw Data",
+                        data=json.dumps(data, indent=2),
+                        file_name="raw_data.json",
+                        mime="application/json"
+                    )
+        except json.JSONDecodeError as e:
+            st.error(f"❌ Invalid JSON file: {e}")
         except Exception as e:
+            st.error(f"❌ Error processing file: {e}")
     else:
+        # Show example when no file uploaded
+        st.info("👆 Please upload a JSON file to begin analysis")
+        with st.expander("📖 How to use"):
             st.markdown("""
+            **Steps:**
+            1. Upload a JSON file with structured data
+            2. Set the target field you want to analyze (e.g., `rotation_enabled`)
+            3. Click "Analyze" to process the data
+            4. Review the structure analysis and field recommendations
+            5. Select fields and generate regex patterns
+            6. Download the results as JSON
+            **What this tool does:**
+            - Detects summary/aggregate fields automatically
+            - Classifies data structure by hierarchy levels
+            - Recommends important fields for validation
+            - Generates regex patterns for field extraction
             """)
+        with st.expander("📋 Example JSON Structure"):
+            example = {
+                "results": {
+                    "summary": {
+                        "total_keys": 13,
+                        "rotated_keys": 6,
+                        "rotation_percentage": 46
+                    },
+                    "kms_keys": {
+                        "object": [
+                            {
+                                "key_id": "12345",
+                                "rotation_enabled": True,
+                                "key_state": "Enabled"
+                            }
+                        ]
+                    }
+                }
+            }
+            st.json(example)
+if __name__ == "__main__":
+    main()