|
|
import subprocess |
|
|
import spaces |
|
|
import os |
|
|
import gradio as gr |
|
|
import json |
|
|
import base64 |
|
|
from huggingface_hub import InferenceClient, login |
|
|
|
|
|
if not os.path.exists("/home/user/.flag"): |
|
|
subprocess.Popen("chmod +x /home/user/app/data/config_nginx.sh && chmod +x /home/user/app/data/setup.sh", shell=True, executable='/bin/bash').wait() |
|
|
subprocess.Popen("curl -o- file:///home/user/app/data/config_nginx.sh | bash", shell=True, executable='/bin/bash').wait() |
|
|
subprocess.Popen("curl -o- file:///home/user/app/data/setup.sh | bash", shell=True, executable='/bin/bash').wait() |
|
|
subprocess.Popen("source /home/user/.bashrc && source /home/user/.nvm/nvm.sh && nvm install --lts && npm update -g npm", shell=True, executable='/bin/bash').wait() |
|
|
node_version_dir = subprocess.check_output("ls ~/.nvm/versions/node", shell=True, executable='/bin/bash').strip().decode('utf-8') |
|
|
node_path = f"/home/user/.nvm/versions/node/{node_version_dir}/bin/" |
|
|
subprocess.Popen(f"{node_path}node {node_path}npm install n8n@latest -g", shell=True, executable='/bin/bash').wait() |
|
|
subprocess.Popen(f"{node_path}node {node_path}npm install pm2@latest -g", shell=True, executable='/bin/bash').wait() |
|
|
subprocess.Popen(["pm2", "start", f"{node_path}n8n"]).wait() |
|
|
subprocess.Popen(["pm2", "start", "/home/user/app/data/models/llama3-1-8b.py","--interpreter=python3"]).wait() |
|
|
subprocess.Popen(["pm2", "start", "/home/user/app/data/models/llama3-1-70b.py","--interpreter=python3"]).wait() |
|
|
|
|
|
|
|
|
|
|
|
api_key = os.getenv("ai") |
|
|
|
|
|
|
|
|
client = InferenceClient("meta-llama/Meta-Llama-3-70B-Instruct",token=api_key) |
|
|
|
|
|
def decode_base64_to_json(base64_str): |
|
|
try: |
|
|
decoded_bytes = base64.b64decode(base64_str) |
|
|
decoded_str = decoded_bytes.decode('utf-8') |
|
|
decoded_str = decoded_str.replace("\\'", "'").replace('\\"', '"').replace('\\\\', '\\') |
|
|
print(f"===================================================\nDecoded string: {decoded_str}\n===================================================") |
|
|
return json.loads(decoded_str) |
|
|
except Exception as e: |
|
|
raise ValueError(f"Error decoding base64 to JSON: {str(e)}") |
|
|
|
|
|
@spaces.GPU() |
|
|
def chat_completion(user_input, max_tokens, temperature, top_p): |
|
|
try: |
|
|
input_data = decode_base64_to_json(user_input) |
|
|
if not isinstance(input_data, list): |
|
|
raise ValueError("Input must be a list of messages.") |
|
|
|
|
|
response = "" |
|
|
for message in client.chat_completion( |
|
|
input_data, |
|
|
max_tokens=max_tokens, |
|
|
stream=True, |
|
|
temperature=temperature, |
|
|
top_p=top_p, |
|
|
): |
|
|
token = message.choices[0].delta.get("content", "") |
|
|
response += token |
|
|
|
|
|
return json.dumps({"status": "success", "output": response}) |
|
|
except Exception as e: |
|
|
return json.dumps({"status": "error", "message": str(e)}) |
|
|
|
|
|
user_input = gr.Textbox(label="User Input as Base64-encoded JSON String", lines=10) |
|
|
max_tokens = gr.Slider(minimum=1, maximum=8092, value=150, label="Max Tokens") |
|
|
temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.7, label="Temperature") |
|
|
top_p = gr.Slider(minimum=0.0, maximum=1.0, value=0.9, label="Top P") |
|
|
|
|
|
iface = gr.Interface( |
|
|
fn=chat_completion, |
|
|
inputs=[user_input, max_tokens, temperature, top_p], |
|
|
outputs="text", |
|
|
title="UCode Agent", |
|
|
description="Provide Base64-encoded JSON input with a list of messages and set the max tokens, temperature, and top_p to generate a chat completion." |
|
|
) |
|
|
|
|
|
if not os.path.exists("/home/user/.flag"): |
|
|
subprocess.Popen("echo 'initialized' > /home/user/.flag", shell=True, executable='/bin/bash').wait() |
|
|
subprocess.Popen("echo 'starting up NginX'", shell=True, executable='/bin/bash').wait() |
|
|
subprocess.Popen(["pm2", "start", "/usr/sbin/nginx"]).wait() |
|
|
|
|
|
iface.launch(share=False) |