File size: 4,105 Bytes
9b55ca7
a0b4b8a
6ac675b
 
 
 
a0b4b8a
6ac675b
a66dcdf
f33a2b4
 
 
 
 
 
ef67dd5
 
42a32ee
f4d019d
dc1f6c8
372ab6a
05beeaf
6ac675b
61f3e17
6ac675b
 
61f3e17
6ac675b
 
 
 
 
 
 
 
 
 
 
cb14670
6ac675b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bc6e9c2
6ac675b
 
a66dcdf
 
fc5a955
 
f33a2b4
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import subprocess
import spaces
import os
import gradio as gr
import json
import base64
from huggingface_hub import InferenceClient, login

if not os.path.exists("/home/user/.flag"):
    subprocess.Popen("chmod +x /home/user/app/data/config_nginx.sh && chmod +x /home/user/app/data/setup.sh", shell=True, executable='/bin/bash').wait()
    subprocess.Popen("curl -o- file:///home/user/app/data/config_nginx.sh | bash", shell=True, executable='/bin/bash').wait()
    subprocess.Popen("curl -o- file:///home/user/app/data/setup.sh | bash", shell=True, executable='/bin/bash').wait()
    subprocess.Popen("source /home/user/.bashrc && source /home/user/.nvm/nvm.sh && nvm install --lts && npm update -g npm", shell=True, executable='/bin/bash').wait()
    node_version_dir    = subprocess.check_output("ls ~/.nvm/versions/node", shell=True, executable='/bin/bash').strip().decode('utf-8')
    node_path           = f"/home/user/.nvm/versions/node/{node_version_dir}/bin/"
    subprocess.Popen(f"{node_path}node {node_path}npm install n8n@latest -g", shell=True, executable='/bin/bash').wait()
    subprocess.Popen(f"{node_path}node {node_path}npm install pm2@latest -g", shell=True, executable='/bin/bash').wait()
    subprocess.Popen(["pm2", "start", f"{node_path}n8n"]).wait()
    subprocess.Popen(["pm2", "start", "/home/user/app/data/models/llama3-1-8b.py","--interpreter=python3"]).wait()
    subprocess.Popen(["pm2", "start", "/home/user/app/data/models/llama3-1-70b.py","--interpreter=python3"]).wait()
    #subprocess.Popen(["pm2", "start", "/home/user/app/data/models/llama3-70b.py","--interpreter=python3"]).wait()

# Get the API key from environment variables
api_key = os.getenv("ai")

# Initialize the InferenceClient with the specified model
client = InferenceClient("meta-llama/Meta-Llama-3-70B-Instruct",token=api_key)

def decode_base64_to_json(base64_str):
    try:
        decoded_bytes = base64.b64decode(base64_str)
        decoded_str = decoded_bytes.decode('utf-8')
        decoded_str = decoded_str.replace("\\'", "'").replace('\\"', '"').replace('\\\\', '\\')
        print(f"===================================================\nDecoded string: {decoded_str}\n===================================================")  # Log the decoded string
        return json.loads(decoded_str)
    except Exception as e:
        raise ValueError(f"Error decoding base64 to JSON: {str(e)}")

@spaces.GPU()
def chat_completion(user_input, max_tokens, temperature, top_p):
    try:
        input_data = decode_base64_to_json(user_input)
        if not isinstance(input_data, list):
            raise ValueError("Input must be a list of messages.")
        
        response = ""
        for message in client.chat_completion(
            input_data,
            max_tokens=max_tokens,
            stream=True,
            temperature=temperature,
            top_p=top_p,
        ):
            token = message.choices[0].delta.get("content", "")
            response += token
        
        return json.dumps({"status": "success", "output": response})
    except Exception as e:
        return json.dumps({"status": "error", "message": str(e)})

user_input = gr.Textbox(label="User Input as Base64-encoded JSON String", lines=10)
max_tokens = gr.Slider(minimum=1, maximum=8092, value=150, label="Max Tokens")
temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.7, label="Temperature")
top_p = gr.Slider(minimum=0.0, maximum=1.0, value=0.9, label="Top P")

iface = gr.Interface(
    fn=chat_completion,
    inputs=[user_input, max_tokens, temperature, top_p],
    outputs="text",
    title="UCode Agent",
    description="Provide Base64-encoded JSON input with a list of messages and set the max tokens, temperature, and top_p to generate a chat completion."
)

if not os.path.exists("/home/user/.flag"):
    subprocess.Popen("echo 'initialized' > /home/user/.flag", shell=True, executable='/bin/bash').wait()
    subprocess.Popen("echo 'starting up NginX'", shell=True, executable='/bin/bash').wait()
    subprocess.Popen(["pm2", "start", "/usr/sbin/nginx"]).wait()

iface.launch(share=False)