Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,8 +1,9 @@
|
|
| 1 |
import os
|
|
|
|
| 2 |
import torch
|
| 3 |
import secrets
|
| 4 |
import time
|
| 5 |
-
from fastapi import FastAPI, HTTPException,
|
| 6 |
from fastapi.security.api_key import APIKeyHeader
|
| 7 |
from pydantic import BaseModel
|
| 8 |
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
|
|
@@ -13,7 +14,7 @@ MODEL_PATH = "/app/model"
|
|
| 13 |
API_KEY_NAME = "X-API-Key"
|
| 14 |
api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=False)
|
| 15 |
|
| 16 |
-
# In-memory storage for keys
|
| 17 |
generated_keys = {}
|
| 18 |
|
| 19 |
app = FastAPI(title="Overflow-111.7B API")
|
|
@@ -22,19 +23,22 @@ app = FastAPI(title="Overflow-111.7B API")
|
|
| 22 |
print("Starting Engine: Loading Overflow-111.7B (1-Bit Logic)...")
|
| 23 |
|
| 24 |
try:
|
| 25 |
-
# 1.
|
| 26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
from configuration_overflow import OverflowConfig
|
| 28 |
AutoConfig.register("overflow", OverflowConfig)
|
| 29 |
|
| 30 |
-
#
|
| 31 |
-
# We use trust_remote_code and the registered config to load smoothly
|
| 32 |
tokenizer = AutoTokenizer.from_pretrained(
|
| 33 |
MODEL_PATH,
|
| 34 |
trust_remote_code=True
|
| 35 |
)
|
| 36 |
|
| 37 |
-
#
|
| 38 |
model = AutoModelForCausalLM.from_pretrained(
|
| 39 |
MODEL_PATH,
|
| 40 |
trust_remote_code=True,
|
|
@@ -52,7 +56,7 @@ class Query(BaseModel):
|
|
| 52 |
max_tokens: int = 50
|
| 53 |
temperature: float = 0.7
|
| 54 |
|
| 55 |
-
# ---
|
| 56 |
@app.get("/api/generate")
|
| 57 |
async def create_new_key():
|
| 58 |
"""Generates a unique of_sk- key."""
|
|
@@ -64,16 +68,14 @@ async def create_new_key():
|
|
| 64 |
"instructions": f"Add this to your headers as '{API_KEY_NAME}'"
|
| 65 |
}
|
| 66 |
|
| 67 |
-
async def
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
return api_key_header
|
| 71 |
|
| 72 |
-
# Check for a
|
| 73 |
-
# Use this if you don't want to keep generating new keys
|
| 74 |
master_key = os.environ.get("MASTER_API_KEY")
|
| 75 |
-
if master_key and
|
| 76 |
-
return
|
| 77 |
|
| 78 |
raise HTTPException(
|
| 79 |
status_code=HTTP_403_FORBIDDEN,
|
|
@@ -82,7 +84,7 @@ async def get_api_key(api_key_header: str = Depends(api_key_header)):
|
|
| 82 |
|
| 83 |
# --- ENDPOINTS ---
|
| 84 |
@app.post("/v1/generate")
|
| 85 |
-
async def generate(query: Query,
|
| 86 |
try:
|
| 87 |
inputs = tokenizer(query.prompt, return_tensors="pt")
|
| 88 |
|
|
@@ -108,8 +110,7 @@ def health_check():
|
|
| 108 |
return {
|
| 109 |
"status": "active",
|
| 110 |
"model": "Overflow-111.7B",
|
| 111 |
-
"
|
| 112 |
-
"info": "Visit /api/generate to get an API key."
|
| 113 |
}
|
| 114 |
|
| 115 |
if __name__ == "__main__":
|
|
|
|
| 1 |
import os
|
| 2 |
+
import sys
|
| 3 |
import torch
|
| 4 |
import secrets
|
| 5 |
import time
|
| 6 |
+
from fastapi import FastAPI, HTTPException, Depends
|
| 7 |
from fastapi.security.api_key import APIKeyHeader
|
| 8 |
from pydantic import BaseModel
|
| 9 |
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
|
|
|
|
| 14 |
API_KEY_NAME = "X-API-Key"
|
| 15 |
api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=False)
|
| 16 |
|
| 17 |
+
# In-memory storage for keys
|
| 18 |
generated_keys = {}
|
| 19 |
|
| 20 |
app = FastAPI(title="Overflow-111.7B API")
|
|
|
|
| 23 |
print("Starting Engine: Loading Overflow-111.7B (1-Bit Logic)...")
|
| 24 |
|
| 25 |
try:
|
| 26 |
+
# 1. Inject model path into system path so Python can find custom modules
|
| 27 |
+
if MODEL_PATH not in sys.path:
|
| 28 |
+
sys.path.append(MODEL_PATH)
|
| 29 |
+
|
| 30 |
+
# 2. Register the custom config class
|
| 31 |
+
# This assumes the file in /app/model is named 'configuration_overflow.py'
|
| 32 |
from configuration_overflow import OverflowConfig
|
| 33 |
AutoConfig.register("overflow", OverflowConfig)
|
| 34 |
|
| 35 |
+
# 3. Load Tokenizer
|
|
|
|
| 36 |
tokenizer = AutoTokenizer.from_pretrained(
|
| 37 |
MODEL_PATH,
|
| 38 |
trust_remote_code=True
|
| 39 |
)
|
| 40 |
|
| 41 |
+
# 4. Load Model
|
| 42 |
model = AutoModelForCausalLM.from_pretrained(
|
| 43 |
MODEL_PATH,
|
| 44 |
trust_remote_code=True,
|
|
|
|
| 56 |
max_tokens: int = 50
|
| 57 |
temperature: float = 0.7
|
| 58 |
|
| 59 |
+
# --- AUTH LOGIC ---
|
| 60 |
@app.get("/api/generate")
|
| 61 |
async def create_new_key():
|
| 62 |
"""Generates a unique of_sk- key."""
|
|
|
|
| 68 |
"instructions": f"Add this to your headers as '{API_KEY_NAME}'"
|
| 69 |
}
|
| 70 |
|
| 71 |
+
async def verify_auth(api_key: str = Depends(api_key_header)):
|
| 72 |
+
if api_key in generated_keys:
|
| 73 |
+
return api_key
|
|
|
|
| 74 |
|
| 75 |
+
# Check for a MASTER_API_KEY set in Space Secrets/Variables
|
|
|
|
| 76 |
master_key = os.environ.get("MASTER_API_KEY")
|
| 77 |
+
if master_key and api_key == master_key:
|
| 78 |
+
return api_key
|
| 79 |
|
| 80 |
raise HTTPException(
|
| 81 |
status_code=HTTP_403_FORBIDDEN,
|
|
|
|
| 84 |
|
| 85 |
# --- ENDPOINTS ---
|
| 86 |
@app.post("/v1/generate")
|
| 87 |
+
async def generate(query: Query, auth: str = Depends(verify_auth)):
|
| 88 |
try:
|
| 89 |
inputs = tokenizer(query.prompt, return_tensors="pt")
|
| 90 |
|
|
|
|
| 110 |
return {
|
| 111 |
"status": "active",
|
| 112 |
"model": "Overflow-111.7B",
|
| 113 |
+
"auth_method": "X-API-Key"
|
|
|
|
| 114 |
}
|
| 115 |
|
| 116 |
if __name__ == "__main__":
|