File size: 3,810 Bytes
4a5d194
f3f88db
f5d6e87
4a5d194
29b1b2b
4a5d194
e1a6874
fd9a508
 
 
 
 
 
3c90643
 
212ac8d
 
f3f88db
fd9a508
f5d6e87
4a5d194
1574b63
 
 
4a5d194
fd9a508
f5d6e87
 
 
 
 
0a2338b
 
1574b63
 
 
0a2338b
 
 
1574b63
44497dc
3c90643
29e3630
 
 
 
 
 
 
3c90643
212ac8d
1574b63
 
29e3630
 
1574b63
 
 
 
 
f3f88db
0a2338b
 
 
 
 
 
 
 
 
f3f88db
44497dc
aa85114
0a2338b
aa85114
 
024065a
3c90643
0a2338b
f3f88db
 
4a5d194
fd9a508
0a2338b
fd9a508
 
 
 
4a5d194
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import os
import gradio as gr
import torch
import json
from huggingface_hub import InferenceClient
import spaces

# Ensure CUDA is configured correctly
os.environ['CUDA_HOME'] = '/usr/local/cuda'
os.environ['PATH'] = os.environ['PATH'] + ':/usr/local/cuda/bin'

# Function to perform JSON correction using a chosen model
@spaces.GPU(duration=120)  # Use GPU for this function, if available
def ai_fix_json(model_id, json_data):
    client = InferenceClient(model=model_id)
    prompt = f"Fix the following JSON data and make it valid:\n\n{json_data}\n\nFixed JSON:"
    
    try:
        # Check if GPU is available
        if torch.cuda.is_available():
            response = client.text_generation(prompt, max_new_tokens=1024)
            # Handle cases where the response might be malformed or too short
            if not response or 'generated_text' not in response[0]:
                return None, f"Failed to process JSON with model {model_id}. Response was invalid."
            fixed_json = response[0]['generated_text'].split("Fixed JSON:")[-1].strip()
            return fixed_json, f"JSON fixed using AI on GPU with model: {model_id}"
        else:
            raise RuntimeError("GPU not available, falling back to CPU.")
    
    except Exception as gpu_error:
        print(f"Falling back to CPU due to: {gpu_error}")
        try:
            response = client.text_generation(prompt, max_new_tokens=1024)
            # Handle cases where the response might be malformed or too short
            if not response or 'generated_text' not in response[0]:
                return None, f"Failed to process JSON with model {model_id}. Response was invalid."
            fixed_json = response[0]['generated_text'].split("Fixed JSON:")[-1].strip()
            return fixed_json, f"JSON fixed using AI on CPU with model: {model_id}"
        except Exception as e:
            return None, f"Failed to process with model {model_id}. Error: {str(e)}"

def process_file(model_id, uploaded_file):
    # If uploaded_file is a file-like object, read its content
    if hasattr(uploaded_file, 'read'):
        json_data = uploaded_file.read().decode("utf-8")
    else:
        # If it's already a string (like in the case of `NamedString`), use it directly
        json_data = uploaded_file
    
    cleaned_json, message = ai_fix_json(model_id, json_data)
    
    if cleaned_json:
        try:
            parsed_data = json.loads(cleaned_json, ensure_ascii=False)  # Properly load and handle Unicode characters
            pretty_json = json.dumps(parsed_data, indent=4, ensure_ascii=False)  # Keep Unicode characters readable
            return pretty_json, message, pretty_json
        except json.JSONDecodeError as e:
            return None, f"Failed to fix JSON: {str(e)}", None
    else:
        return None, message, None

# List of available models
model_options = [
    "EleutherAI/gpt-neo-2.7B",
    "gpt2",
    "facebook/opt-1.3b",
    "EleutherAI/gpt-j-6B",
    "google/flan-t5-base"
]

iface = gr.Interface(
    fn=process_file,
    inputs=[
        gr.Dropdown(label="Select Model", choices=model_options, value="EleutherAI/gpt-neo-2.7B"),
        gr.File(label="Upload your JSON file")
    ],
    outputs=[gr.JSON(label="Fixed JSON"), "text", gr.File(label="Download cleaned JSON file")],
    title="AI-Powered JSON Cleaner with Model Selection",
    description="Upload a JSON file to automatically fix, remove duplicates, and download the cleaned version using AI with GPU/CPU fallback. Select any model from the dropdown list."
)

if __name__ == "__main__":
    try:
        # Initialize any necessary services, e.g., ZeroGPU
        print("ZeroGPU initialized.")
    except Exception as e:
        print(f"ZeroGPU initialization failed: {e}. Falling back to CPU.")
    
    iface.launch()