Validjson / app.py
Oranblock's picture
Update app.py
29e3630 verified
import os
import gradio as gr
import torch
import json
from huggingface_hub import InferenceClient
import spaces
# Ensure CUDA is configured correctly
os.environ['CUDA_HOME'] = '/usr/local/cuda'
os.environ['PATH'] = os.environ['PATH'] + ':/usr/local/cuda/bin'
# Function to perform JSON correction using a chosen model
@spaces.GPU(duration=120) # Use GPU for this function, if available
def ai_fix_json(model_id, json_data):
client = InferenceClient(model=model_id)
prompt = f"Fix the following JSON data and make it valid:\n\n{json_data}\n\nFixed JSON:"
try:
# Check if GPU is available
if torch.cuda.is_available():
response = client.text_generation(prompt, max_new_tokens=1024)
# Handle cases where the response might be malformed or too short
if not response or 'generated_text' not in response[0]:
return None, f"Failed to process JSON with model {model_id}. Response was invalid."
fixed_json = response[0]['generated_text'].split("Fixed JSON:")[-1].strip()
return fixed_json, f"JSON fixed using AI on GPU with model: {model_id}"
else:
raise RuntimeError("GPU not available, falling back to CPU.")
except Exception as gpu_error:
print(f"Falling back to CPU due to: {gpu_error}")
try:
response = client.text_generation(prompt, max_new_tokens=1024)
# Handle cases where the response might be malformed or too short
if not response or 'generated_text' not in response[0]:
return None, f"Failed to process JSON with model {model_id}. Response was invalid."
fixed_json = response[0]['generated_text'].split("Fixed JSON:")[-1].strip()
return fixed_json, f"JSON fixed using AI on CPU with model: {model_id}"
except Exception as e:
return None, f"Failed to process with model {model_id}. Error: {str(e)}"
def process_file(model_id, uploaded_file):
# If uploaded_file is a file-like object, read its content
if hasattr(uploaded_file, 'read'):
json_data = uploaded_file.read().decode("utf-8")
else:
# If it's already a string (like in the case of `NamedString`), use it directly
json_data = uploaded_file
cleaned_json, message = ai_fix_json(model_id, json_data)
if cleaned_json:
try:
parsed_data = json.loads(cleaned_json, ensure_ascii=False) # Properly load and handle Unicode characters
pretty_json = json.dumps(parsed_data, indent=4, ensure_ascii=False) # Keep Unicode characters readable
return pretty_json, message, pretty_json
except json.JSONDecodeError as e:
return None, f"Failed to fix JSON: {str(e)}", None
else:
return None, message, None
# List of available models
model_options = [
"EleutherAI/gpt-neo-2.7B",
"gpt2",
"facebook/opt-1.3b",
"EleutherAI/gpt-j-6B",
"google/flan-t5-base"
]
iface = gr.Interface(
fn=process_file,
inputs=[
gr.Dropdown(label="Select Model", choices=model_options, value="EleutherAI/gpt-neo-2.7B"),
gr.File(label="Upload your JSON file")
],
outputs=[gr.JSON(label="Fixed JSON"), "text", gr.File(label="Download cleaned JSON file")],
title="AI-Powered JSON Cleaner with Model Selection",
description="Upload a JSON file to automatically fix, remove duplicates, and download the cleaned version using AI with GPU/CPU fallback. Select any model from the dropdown list."
)
if __name__ == "__main__":
try:
# Initialize any necessary services, e.g., ZeroGPU
print("ZeroGPU initialized.")
except Exception as e:
print(f"ZeroGPU initialization failed: {e}. Falling back to CPU.")
iface.launch()