import os import gradio as gr import torch import json from huggingface_hub import InferenceClient import spaces # Ensure CUDA is configured correctly os.environ['CUDA_HOME'] = '/usr/local/cuda' os.environ['PATH'] = os.environ['PATH'] + ':/usr/local/cuda/bin' # Function to perform JSON correction using a chosen model @spaces.GPU(duration=120) # Use GPU for this function, if available def ai_fix_json(model_id, json_data): client = InferenceClient(model=model_id) prompt = f"Fix the following JSON data and make it valid:\n\n{json_data}\n\nFixed JSON:" try: # Check if GPU is available if torch.cuda.is_available(): response = client.text_generation(prompt, max_new_tokens=1024) # Handle cases where the response might be malformed or too short if not response or 'generated_text' not in response[0]: return None, f"Failed to process JSON with model {model_id}. Response was invalid." fixed_json = response[0]['generated_text'].split("Fixed JSON:")[-1].strip() return fixed_json, f"JSON fixed using AI on GPU with model: {model_id}" else: raise RuntimeError("GPU not available, falling back to CPU.") except Exception as gpu_error: print(f"Falling back to CPU due to: {gpu_error}") try: response = client.text_generation(prompt, max_new_tokens=1024) # Handle cases where the response might be malformed or too short if not response or 'generated_text' not in response[0]: return None, f"Failed to process JSON with model {model_id}. Response was invalid." fixed_json = response[0]['generated_text'].split("Fixed JSON:")[-1].strip() return fixed_json, f"JSON fixed using AI on CPU with model: {model_id}" except Exception as e: return None, f"Failed to process with model {model_id}. Error: {str(e)}" def process_file(model_id, uploaded_file): # If uploaded_file is a file-like object, read its content if hasattr(uploaded_file, 'read'): json_data = uploaded_file.read().decode("utf-8") else: # If it's already a string (like in the case of `NamedString`), use it directly json_data = uploaded_file cleaned_json, message = ai_fix_json(model_id, json_data) if cleaned_json: try: parsed_data = json.loads(cleaned_json, ensure_ascii=False) # Properly load and handle Unicode characters pretty_json = json.dumps(parsed_data, indent=4, ensure_ascii=False) # Keep Unicode characters readable return pretty_json, message, pretty_json except json.JSONDecodeError as e: return None, f"Failed to fix JSON: {str(e)}", None else: return None, message, None # List of available models model_options = [ "EleutherAI/gpt-neo-2.7B", "gpt2", "facebook/opt-1.3b", "EleutherAI/gpt-j-6B", "google/flan-t5-base" ] iface = gr.Interface( fn=process_file, inputs=[ gr.Dropdown(label="Select Model", choices=model_options, value="EleutherAI/gpt-neo-2.7B"), gr.File(label="Upload your JSON file") ], outputs=[gr.JSON(label="Fixed JSON"), "text", gr.File(label="Download cleaned JSON file")], title="AI-Powered JSON Cleaner with Model Selection", description="Upload a JSON file to automatically fix, remove duplicates, and download the cleaned version using AI with GPU/CPU fallback. Select any model from the dropdown list." ) if __name__ == "__main__": try: # Initialize any necessary services, e.g., ZeroGPU print("ZeroGPU initialized.") except Exception as e: print(f"ZeroGPU initialization failed: {e}. Falling back to CPU.") iface.launch()