Spaces:
Sleeping
Sleeping
File size: 2,579 Bytes
c0b7e11 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
"""
Colab Node Installer
One-liner: !wget -q https://huggingface.co/spaces/ndwdgda/agent-dashboard/resolve/main/install_node.py && python install_node.py
"""
import os
import subprocess
import sys
import time
print("="*60)
print("🚀 FREE SUPERCOMPUTER NODE SETUP")
print("="*60)
# 1. Install Dependencies
print("\n📦 Installing dependencies...")
packages = [
"fastapi", "nest-asyncio", "pyngrok", "uvicorn",
"transformers", "accelerate", "bitsandbytes", "torch"
]
subprocess.run([sys.executable, "-m", "pip", "install", "-q"] + packages)
print("✓ Dependencies installed")
# 2. Setup ngrok
token_file = "/content/ngrok_token.txt"
if os.path.exists(token_file):
token = open(token_file).read().strip()
print("✓ Found ngrok token")
else:
# Try default or ask
token = "37qBoBViEaGHxxpXeAWfljhM2XA_4xv22ydkx6SN3jdPw7RwL"
print("✓ Using default ngrok token")
from pyngrok import ngrok
ngrok.set_auth_token(token)
# 3. Create Server Code
server_code = '''
from fastapi import FastAPI
from pydantic import BaseModel
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import uvicorn
import nest_asyncio
from pyngrok import ngrok
import os
app = FastAPI()
# Model
MODEL_NAME = "meta-llama/Llama-3.2-1B"
print(f"Loading {MODEL_NAME}...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
torch_dtype=torch.float16,
device_map="auto"
)
print("Model loaded")
class Req(BaseModel):
prompt: str
max_tokens: int = 200
@app.post("/generate")
def generate(req: Req):
inputs = tokenizer(req.prompt, return_tensors="pt").to(model.device)
with torch.no_grad():
out = model.generate(**inputs, max_new_tokens=req.max_tokens)
return {"text": tokenizer.decode(out[0], skip_special_tokens=True)}
@app.get("/")
def root():
return {"status": "ok", "gpu": torch.cuda.get_device_name(0) if torch.cuda.is_available() else "cpu"}
# Check if port is taken, kill if needed
import os
os.system("fuser -k 8000/tcp")
# Start tunnel
nest_asyncio.apply()
public_url = ngrok.connect(8000).public_url
print(f"\\n🔗 PUBLIC URL: {public_url}\\n")
with open("/content/node_url.txt", "w") as f: f.write(public_url)
uvicorn.run(app, host="0.0.0.0", port=8000)
'''
with open("server.py", "w") as f:
f.write(server_code)
# 4. Run Server
print("\n🔥 Starting Node Server...")
print("Check the output for the PUBLIC URL")
os.system(f"{sys.executable} server.py")
|