CooLLaMACEO commited on
Commit
b4391b2
·
verified ·
1 Parent(s): 0196236

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -31
app.py CHANGED
@@ -2,6 +2,7 @@ import os
2
  import torch
3
  import secrets
4
  import time
 
5
  from fastapi import FastAPI, HTTPException, Security, Depends
6
  from fastapi.security.api_key import APIKeyHeader
7
  from pydantic import BaseModel
@@ -13,62 +14,98 @@ MODEL_PATH = "/app/model"
13
  API_KEY_NAME = "X-API-Key"
14
  api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=False)
15
 
16
- # In-memory storage for keys.
17
- # Note: These will reset if the Space restarts unless you use Persistent Storage.
18
  generated_keys = {}
19
 
20
- app = FastAPI(title="Overflow-111.7B API Manager")
21
 
22
  # --- MODEL LOADING ---
23
- print("Loading Overflow-111.7B Engine...")
24
- tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
25
- model = AutoModelForCausalLM.from_pretrained(
26
- MODEL_PATH,
27
- trust_remote_code=True,
28
- device_map={"": "cpu"},
29
- torch_dtype=torch.bfloat16,
30
- low_cpu_mem_usage=True
31
- )
32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  class Query(BaseModel):
34
  prompt: str
35
  max_tokens: int = 50
 
36
 
37
- # --- API KEY GENERATION ---
38
  @app.get("/api/generate")
39
  async def create_new_key():
40
- """Generates a new of_sk key for the user."""
41
- # Generate a random string of 24 characters
42
- random_hex = secrets.token_hex(12)
43
- new_key = f"of_sk-{random_hex}"
44
-
45
- # Store with a timestamp
46
  generated_keys[new_key] = {"created_at": time.time()}
47
-
48
  return {
49
  "status": "success",
50
  "api_key": new_key,
51
- "instructions": f"Include this key in your request header as '{API_KEY_NAME}'"
52
  }
53
 
54
- # --- SECURITY CHECK ---
55
  async def get_api_key(api_key_header: str = Depends(api_key_header)):
56
  if api_key_header in generated_keys:
57
  return api_key_header
 
 
 
 
 
58
  raise HTTPException(
59
  status_code=HTTP_403_FORBIDDEN,
60
- detail="Invalid or expired API Key. Generate one at /api/generate"
61
  )
62
 
 
63
  @app.post("/v1/generate")
64
  async def generate(query: Query, api_key: str = Depends(get_api_key)):
65
- inputs = tokenizer(query.prompt, return_tensors="pt")
66
- with torch.no_grad():
67
- output_tokens = model.generate(**inputs, max_new_tokens=query.max_tokens)
68
-
69
- response = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
70
- return {"text": response}
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
  @app.get("/")
73
- def home():
74
- return {"message": "Welcome to Overflow-111.7B. Go to /api/generate to get a key."}
 
 
 
 
 
 
 
 
 
 
2
  import torch
3
  import secrets
4
  import time
5
+ import json
6
  from fastapi import FastAPI, HTTPException, Security, Depends
7
  from fastapi.security.api_key import APIKeyHeader
8
  from pydantic import BaseModel
 
14
  API_KEY_NAME = "X-API-Key"
15
  api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=False)
16
 
17
+ # In-memory storage for keys
18
+ # Note: On a free Space, these reset on restart.
19
  generated_keys = {}
20
 
21
+ app = FastAPI(title="Overflow-111.7B API")
22
 
23
  # --- MODEL LOADING ---
24
+ # We load these at the global level so they stay in memory
25
+ print("Starting Engine: Loading Overflow-111.7B (1-Bit Logic)...")
 
 
 
 
 
 
 
26
 
27
+ try:
28
+ # 1. Load Tokenizer - Explicitly trust remote code to avoid build hangs
29
+ tokenizer = AutoTokenizer.from_pretrained(
30
+ MODEL_PATH,
31
+ trust_remote_code=True
32
+ )
33
+
34
+ # 2. Load Model - Optimized for 16GB CPU RAM
35
+ model = AutoModelForCausalLM.from_pretrained(
36
+ MODEL_PATH,
37
+ trust_remote_code=True,
38
+ device_map={"": "cpu"},
39
+ torch_dtype=torch.bfloat16,
40
+ low_cpu_mem_usage=True
41
+ )
42
+ print("Engine Status: ONLINE")
43
+ except Exception as e:
44
+ print(f"CRITICAL LOADING ERROR: {e}")
45
+
46
+ # --- SCHEMAS ---
47
  class Query(BaseModel):
48
  prompt: str
49
  max_tokens: int = 50
50
+ temperature: float = 0.7
51
 
52
+ # --- API KEY LOGIC ---
53
  @app.get("/api/generate")
54
  async def create_new_key():
55
+ """Generates a unique of_sk- key for the user."""
56
+ new_key = f"of_sk-{secrets.token_hex(12)}"
 
 
 
 
57
  generated_keys[new_key] = {"created_at": time.time()}
 
58
  return {
59
  "status": "success",
60
  "api_key": new_key,
61
+ "instructions": f"Add this to your headers as '{API_KEY_NAME}'"
62
  }
63
 
 
64
  async def get_api_key(api_key_header: str = Depends(api_key_header)):
65
  if api_key_header in generated_keys:
66
  return api_key_header
67
+ # Also allow a master key from environment variables if set
68
+ master_key = os.environ.get("MASTER_API_KEY")
69
+ if master_key and api_key_header == master_key:
70
+ return api_key_header
71
+
72
  raise HTTPException(
73
  status_code=HTTP_403_FORBIDDEN,
74
+ detail="Invalid API Key. Generate one at /api/generate"
75
  )
76
 
77
+ # --- ENDPOINTS ---
78
  @app.post("/v1/generate")
79
  async def generate(query: Query, api_key: str = Depends(get_api_key)):
80
+ try:
81
+ inputs = tokenizer(query.prompt, return_tensors="pt")
82
+
83
+ with torch.no_grad():
84
+ output_tokens = model.generate(
85
+ **inputs,
86
+ max_new_tokens=query.max_tokens,
87
+ temperature=query.temperature,
88
+ do_sample=True if query.temperature > 0 else False
89
+ )
90
+
91
+ response_text = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
92
+ return {
93
+ "object": "text_completion",
94
+ "model": "Overflow-111.7B",
95
+ "choices": [{"text": response_text}]
96
+ }
97
+ except Exception as e:
98
+ raise HTTPException(status_code=500, detail=str(e))
99
 
100
  @app.get("/")
101
+ def health_check():
102
+ return {
103
+ "status": "active",
104
+ "model": "Overflow-111.7B",
105
+ "device": "CPU",
106
+ "usage": "Go to /api/generate to get started"
107
+ }
108
+
109
+ if __name__ == "__main__":
110
+ import uvicorn
111
+ uvicorn.run(app, host="0.0.0.0", port=7860)