[INST]You are a JSON data analysis assistant. Always respond with valid JSON only, no explanations.
{prompt}[/INST]"""
payload = {
"inputs": full_prompt,
"parameters": {
"max_new_tokens": 1000,
"temperature": 0.3,
"return_full_text": False
}
}
api_url = f"https://api-inference.huggingface.co/models/{model_name}"
response = requests.post(api_url, json=payload, headers=headers, timeout=60)
if response.status_code == 503:
raise Exception("Model is loading. Please wait a moment and try again.")
response.raise_for_status()
result = response.json()
# Handle different response formats
if isinstance(result, list) and len(result) > 0:
return result[0].get('generated_text', '')
elif isinstance(result, dict):
return result.get('generated_text', '')
else:
return str(result)
except Exception as e:
raise Exception(f"Failed to call Hugging Face API - {e}")
def parse_llm_output(self, output: str) -> Dict[str, Any]:
"""Parse and validate the LLM JSON output."""
try:
output = output.strip()
if output.startswith("```json"):
output = output[7:]
if output.startswith("```"):
output = output[3:]
if output.endswith("```"):
output = output[:-3]
output = output.strip()
result = json.loads(output)
return result
except json.JSONDecodeError as e:
raise ValueError(f"LLM output is not valid JSON - {e}")
def analyze(self, target_field: str = "rotation_enabled") -> Dict[str, Any]:
"""Main analysis function."""
self.extract_metadata(target_field)
prompt = self.generate_prompt(target_field)
llm_output = self.call_llm(prompt)
result = self.parse_llm_output(llm_output)
return result
def main():
"""Main Streamlit application."""
st.title("📊 JSON Field Analyzer")
if IS_HUGGINGFACE:
st.info("🆓 Running on Hugging Face - FREE Hugging Face AI model available! No API key needed.")
st.markdown("**Upload a JSON file and analyze important fields using LLM**")
# Sidebar for configuration
with st.sidebar:
st.header("⚙️ Configuration")
# Show environment info
if IS_ONLINE and not IS_HUGGINGFACE:
st.info("🌐 Running online - Cloud LLM required")
# LLM Provider Selection
# Default to Hugging Face (free) if online, Ollama on local
if IS_ONLINE:
default_index = 3 # Hugging Face (Free)
else:
default_index = 0 # Ollama
llm_provider = st.selectbox(
"🤖 LLM Provider",
["Ollama (Local)", "OpenAI (Cloud)", "Anthropic Claude (Cloud)", "Hugging Face (Free 🌟)"],
index=default_index,
help="Choose your LLM provider - Hugging Face is FREE and no API key needed!"
)
# Extract provider name and model
if llm_provider == "Ollama (Local)":
provider_name = "ollama"
api_key = None
if IS_ONLINE:
st.error("❌ Ollama not available on this platform")
st.markdown("**Please select a cloud LLM provider:**")
st.markdown("- OpenAI (Cloud) - GPT-4o Mini")
st.markdown("- Anthropic Claude (Cloud) - Recommended")
else:
st.info("📝 Using local Ollama")
elif llm_provider == "OpenAI (Cloud)":
provider_name = "openai"
api_key = os.getenv("OPENAI_API_KEY") or st.text_input(
"OpenAI API Key",
type="password",
help="Enter your OpenAI API key (or set OPENAI_API_KEY env var)"
)
if not api_key:
st.warning("⚠️ Please enter your OpenAI API key")
st.info("💡 Get key: https://platform.openai.com/api-keys")
elif llm_provider == "Anthropic Claude (Cloud)":
provider_name = "anthropic"
api_key = os.getenv("ANTHROPIC_API_KEY") or st.text_input(
"Anthropic API Key",
type="password",
help="Enter your Anthropic API key (or set ANTHROPIC_API_KEY env var)"
)
if not api_key:
st.warning("⚠️ Please enter your Anthropic API key")
st.info("💡 Get key: https://console.anthropic.com")
else: # Hugging Face (Free)
provider_name = "huggingface"
api_key = os.getenv("HUGGINGFACE_API_KEY") or st.text_input(
"Hugging Face API Key (Optional)",
type="password",
help="Optional: Enter your HF token for faster inference (or set HUGGINGFACE_API_KEY env var)"
)
if not api_key:
st.info("✨ Using free Hugging Face Inference API - no key needed!")
st.info("💡 Optional: Add your token in Settings > Secrets for better performance")
st.markdown("---")
target_field = st.text_input(
"Target Field",
value="rotation_enabled",
help="The field you want to analyze (e.g., rotation_enabled, ssl_enforced)"
)
st.markdown("---")
st.markdown("### 📋 Setup Guides")
with st.expander("🔧 Local Ollama Setup"):
st.code("""
brew install ollama
ollama serve
ollama pull llama3.2:3b
""", language="bash")
with st.expander("☁️ Cloud API Setup"):
st.markdown("""
**OpenAI:**
- Get key: https://platform.openai.com/api-keys
- Model: GPT-4o Mini
**Anthropic:**
- Get key: https://console.anthropic.com
- Model: Claude 3.5 Sonnet
""")
# File upload section
st.markdown("---")
st.header("📤 Upload JSON File")
uploaded_file = st.file_uploader(
"Choose a JSON file",
type=['json'],
help="Upload a JSON file to analyze"
)
# Display file info if uploaded
if uploaded_file is not None:
try:
# Read file contents
content = uploaded_file.read()
data = json.loads(content)
st.success("✅ File uploaded successfully!")
# Show file info
col1, col2 = st.columns(2)
with col1:
st.metric("File Size", f"{len(content) / 1024:.2f} KB")
with col2:
st.metric("JSON Structure", "Valid" if isinstance(data, (dict, list)) else "Invalid")
# Analyze button
st.markdown("---")
col1, col2, col3 = st.columns([1, 2, 1])
with col2:
analyze_button = st.button("🔍 Analyze with LLM", type="primary", use_container_width=True)
# Run analysis
if analyze_button:
# Prevent Ollama usage on online platforms
if provider_name == "ollama" and IS_ONLINE:
st.error("❌ Ollama is not available on this platform")
st.info("💡 Please select 'Anthropic Claude (Cloud)' or 'OpenAI (Cloud)' from the sidebar")
# Validate API key for cloud providers (except Hugging Face which is optional)
elif provider_name in ["openai", "anthropic"] and not api_key:
st.error("❌ Please enter an API key for the selected cloud provider")
else:
try:
with st.spinner(f"Analyzing with {llm_provider}... This may take a moment."):
analyzer = FileAnalyzer(data, llm_provider=provider_name, api_key=api_key)
result = analyzer.analyze(target_field=target_field)
# Display results
st.markdown("---")
st.header("📊 Analysis Results")
# Main results in columns
col1, col2 = st.columns(2)
with col1:
st.subheader("🤖 Important Fields")
for i, field in enumerate(result.get('important_fields', []), 1):
st.markdown(f"**{i}. {field}**")
with col2:
st.subheader("💡 Reasoning")
st.markdown(f'{result.get("reasoning", "N/A")}
',
unsafe_allow_html=True)
# Regex patterns
st.markdown("---")
st.subheader("🔧 Generated Regex Patterns")
regex_patterns = result.get('generated_regex', [])
for i, pattern in enumerate(regex_patterns, 1):
st.markdown(f"**Pattern {i}:**")
st.code(pattern, language="regex")
# Raw JSON output
with st.expander("📄 View Raw JSON Output"):
st.json(result)
# Download results
st.markdown("---")
result_json = json.dumps(result, indent=2)
st.download_button(
label="⬇️ Download Results",
data=result_json,
file_name=f"analysis_{target_field}.json",
mime="application/json"
)
except ConnectionError as e:
st.error(f"❌ {e}")
if provider_name == "ollama":
st.info("💡 Start Ollama with: `ollama serve`")
else:
st.info("💡 Check your internet connection and API key")
except TimeoutError as e:
st.error(f"❌ {e}")
st.info("💡 The analysis took too long. Try again or use a larger timeout.")
except Exception as e:
st.error(f"❌ Error during analysis: {e}")
st.exception(e)
except json.JSONDecodeError:
st.error("❌ Invalid JSON file. Please upload a valid JSON file.")
except Exception as e:
st.error(f"❌ Error reading file: {e}")
st.exception(e)
else:
# Show example when no file is uploaded
st.info("👆 Please upload a JSON file to get started")
with st.expander("📖 How it works"):
st.markdown("""
### Workflow:
1. **Upload**: Upload your JSON file using the file uploader above
2. **Configure**: Set the target field name in the sidebar (default: `rotation_enabled`)
3. **Analyze**: Click the "Analyze with LLM" button
4. **Review**: View the important fields, reasoning, and regex patterns
5. **Download**: Save the results as JSON
### What it does:
- Analyzes your JSON structure to detect summary fields, configurations, and objects
- Uses LLM to identify important fields related to your target
- Generates regex patterns for data extraction and validation
- Provides reasoning for why each field is important
### Use cases:
- AWS compliance validation (KMS rotation, SSL enforcement, etc.)
- Data quality checks
- Automated validation pattern generation
- Field correlation analysis
""")
# Call main function - Streamlit will handle errors
main()