Spaces:
Sleeping
Sleeping
File size: 3,796 Bytes
bf00a76 0187888 7a93b93 0187888 5ec5b09 7a93b93 5ec5b09 992127b f895e5d 0187888 60fb399 f895e5d bf00a76 0187888 6a82d80 992127b 6a82d80 fa3e031 6a82d80 992127b 0187888 60fb399 01cf8d5 0187888 b4391b2 7a93b93 d0c2ac1 7a93b93 60fb399 01cf8d5 60fb399 992127b 01cf8d5 992127b 60fb399 992127b 60fb399 01cf8d5 992127b b4391b2 992127b b4391b2 992127b d0c2ac1 b4391b2 6a82d80 5ec5b09 b4391b2 5ec5b09 60fb399 5ec5b09 60fb399 b4391b2 5ec5b09 d0c2ac1 5ec5b09 7a93b93 d0c2ac1 7a93b93 d0c2ac1 5ec5b09 6a82d80 5ec5b09 7a93b93 f895e5d bf00a76 60fb399 f895e5d b4391b2 bf00a76 b4391b2 60fb399 b4391b2 6a82d80 bf00a76 6a82d80 b4391b2 0187888 5ec5b09 d0c2ac1 f895e5d 6a82d80 bf00a76 6a82d80 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 | # app.py
import os
import sys
import torch
import secrets
import time
from fastapi import FastAPI, HTTPException, Depends
from fastapi.security.api_key import APIKeyHeader
from pydantic import BaseModel
from transformers import AutoModelForCausalLM, AutoConfig, GPT2TokenizerFast
from starlette.status import HTTP_403_FORBIDDEN, HTTP_503_SERVICE_UNAVAILABLE
# --- 1. GLOBAL VARIABLES ---
tokenizer = None
model = None
generated_keys = {}
# --- 2. CONFIGURATION ---
MODEL_PATH = "/app/model" # folder with vocab.json, merges.txt, model weights
API_KEY_NAME = "X-API-Key"
api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=True)
app = FastAPI(title="Overflow-111.7B API")
# --- 3. MODEL & TOKENIZER LOADING ---
print("Starting Engine: Initializing Self-Registration...")
try:
if MODEL_PATH not in sys.path:
sys.path.insert(0, MODEL_PATH)
# Import configuration and model
import configuration_overflow
import modeling_overflow
# Register config and model
conf_class = configuration_overflow.OverflowConfig
model_class = modeling_overflow.OverflowForCausalLM
AutoConfig.register("overflow", conf_class)
AutoModelForCausalLM.register(conf_class, model_class)
print(f"Successfully registered model class {model_class.__name__}.")
# Load GPT2-style tokenizer from vocab.json + merges.txt
tokenizer = GPT2TokenizerFast(
vocab_file=f"{MODEL_PATH}/vocab.json",
merges_file=f"{MODEL_PATH}/merges.txt",
unk_token="",
bos_token="",
eos_token=""
)
print("Tokenizer loaded successfully.")
# Load model weights
model = AutoModelForCausalLM.from_pretrained(
MODEL_PATH,
trust_remote_code=True,
device_map={"": "cpu"},
torch_dtype=torch.bfloat16,
low_cpu_mem_usage=True
)
print("Model loaded successfully. Engine Status: ONLINE")
except Exception as e:
print(f"CRITICAL LOADING ERROR: {e}")
# --- 4. API SCHEMAS ---
class Query(BaseModel):
prompt: str
max_tokens: int = 50
temperature: float = 0.7
# --- 5. API KEY AUTHENTICATION ---
@app.get("/api/generate")
async def create_new_key():
"""Generate a unique API key."""
new_key = f"of_sk-{secrets.token_hex(12)}"
generated_keys[new_key] = {"created_at": time.time()}
return {"status": "success", "api_key": new_key}
async def verify_auth(api_key: str = Depends(api_key_header)):
if api_key in generated_keys or api_key == os.environ.get("MASTER_API_KEY"):
return api_key
raise HTTPException(status_code=HTTP_403_FORBIDDEN, detail="Invalid API Key")
# --- 6. CORE ENDPOINTS ---
@app.post("/v1/generate")
async def generate(query: Query, auth: str = Depends(verify_auth)):
if tokenizer is None or model is None:
raise HTTPException(
status_code=HTTP_503_SERVICE_UNAVAILABLE,
detail="Engine still loading. Please wait."
)
try:
inputs = tokenizer(query.prompt, return_tensors="pt")
with torch.no_grad():
output_tokens = model.generate(
**inputs,
max_new_tokens=query.max_tokens,
temperature=query.temperature,
do_sample=query.temperature > 0
)
response_text = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
return {
"model": "Overflow-111.7B",
"choices": [{"text": response_text}]
}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.get("/")
def health():
state = "active" if model else "loading"
return {"status": state, "engine": "Overflow-111.7B"}
# --- 7. RUN SERVER ---
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860) |