File size: 2,579 Bytes
c0b7e11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
"""
Colab Node Installer
One-liner: !wget -q https://huggingface.co/spaces/ndwdgda/agent-dashboard/resolve/main/install_node.py && python install_node.py
"""
import os
import subprocess
import sys
import time

print("="*60)
print("🚀 FREE SUPERCOMPUTER NODE SETUP")
print("="*60)

# 1. Install Dependencies
print("\n📦 Installing dependencies...")
packages = [
    "fastapi", "nest-asyncio", "pyngrok", "uvicorn",
    "transformers", "accelerate", "bitsandbytes", "torch"
]
subprocess.run([sys.executable, "-m", "pip", "install", "-q"] + packages)
print("✓ Dependencies installed")

# 2. Setup ngrok
token_file = "/content/ngrok_token.txt"
if os.path.exists(token_file):
    token = open(token_file).read().strip()
    print("✓ Found ngrok token")
else:
    # Try default or ask
    token = "37qBoBViEaGHxxpXeAWfljhM2XA_4xv22ydkx6SN3jdPw7RwL"
    print("✓ Using default ngrok token")

from pyngrok import ngrok
ngrok.set_auth_token(token)

# 3. Create Server Code
server_code = '''
from fastapi import FastAPI
from pydantic import BaseModel
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import uvicorn
import nest_asyncio
from pyngrok import ngrok
import os

app = FastAPI()

# Model
MODEL_NAME = "meta-llama/Llama-3.2-1B"
print(f"Loading {MODEL_NAME}...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME, 
    torch_dtype=torch.float16, 
    device_map="auto"
)
print("Model loaded")

class Req(BaseModel):
    prompt: str
    max_tokens: int = 200

@app.post("/generate")
def generate(req: Req):
    inputs = tokenizer(req.prompt, return_tensors="pt").to(model.device)
    with torch.no_grad():
        out = model.generate(**inputs, max_new_tokens=req.max_tokens)
    return {"text": tokenizer.decode(out[0], skip_special_tokens=True)}

@app.get("/")
def root():
    return {"status": "ok", "gpu": torch.cuda.get_device_name(0) if torch.cuda.is_available() else "cpu"}

# Check if port is taken, kill if needed
import os
os.system("fuser -k 8000/tcp")

# Start tunnel
nest_asyncio.apply()
public_url = ngrok.connect(8000).public_url
print(f"\\n🔗 PUBLIC URL: {public_url}\\n")
with open("/content/node_url.txt", "w") as f: f.write(public_url)

uvicorn.run(app, host="0.0.0.0", port=8000)
'''

with open("server.py", "w") as f:
    f.write(server_code)

# 4. Run Server
print("\n🔥 Starting Node Server...")
print("Check the output for the PUBLIC URL")
os.system(f"{sys.executable} server.py")