import subprocess import spaces import os import gradio as gr import json import base64 from huggingface_hub import InferenceClient, login subprocess.run(["pip","install","--upgrade","pip"]) subprocess.call subprocess.run("curl -o- file:///home/user/app/data/setup.sh | bash", shell=True, executable='/bin/bash') subprocess.call subprocess.run(["source","/home/user/.bashrc"]) subprocess.call subprocess.run(["nvm","install","lts/iron"]) subprocess.call subprocess.run(["npm","update","-g","npm"]) subprocess.call # Get the API key from environment variables key = "UCODE_SECRET" login(os.getenv(key)) # Initialize the InferenceClient with the specified model client = InferenceClient("meta-llama/Meta-Llama-3-70B-Instruct") def decode_base64_to_json(base64_str): try: # Decode the base64 string decoded_bytes = base64.b64decode(base64_str) # Convert bytes to string decoded_str = decoded_bytes.decode('utf-8') # Fix escaped characters decoded_str = decoded_str.replace("\\'", "'").replace('\\"', '"').replace('\\\\', '\\') print(f"===================================================\nDecoded string: {decoded_str}\n===================================================") # Log the decoded string # Parse the JSON string return json.loads(decoded_str) except Exception as e: raise ValueError(f"Error decoding base64 to JSON: {str(e)}") @spaces.GPU(enable_queue=True) def chat_completion(user_input, max_tokens, temperature, top_p): try: # Decode the base64-encoded JSON input input_data = decode_base64_to_json(user_input) # Ensure the input is a list of messages if not isinstance(input_data, list): raise ValueError("Input must be a list of messages.") response = "" # Generate chat completion for message in client.chat_completion( input_data, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ): token = message.choices[0].delta.get("content", "") response += token return json.dumps({"status": "success", "output": response}) except Exception as e: return json.dumps({"status": "error", "message": str(e)}) # Create Gradio components for user input user_input = gr.Textbox(label="User Input as Base64-encoded JSON String", lines=10) max_tokens = gr.Slider(minimum=1, maximum=8092, value=150, label="Max Tokens") temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.7, label="Temperature") top_p = gr.Slider(minimum=0.0, maximum=1.0, value=0.9, label="Top P") # Set up Gradio interface iface = gr.Interface( fn=chat_completion, inputs=[user_input, max_tokens, temperature, top_p], outputs="text", title="UCode Agent", description="Provide Base64-encoded JSON input with a list of messages and set the max tokens, temperature, and top_p to generate a chat completion." ) # Launch the Gradio interface iface.launch()