CooLLaMACEO commited on
Commit
992127b
·
verified ·
1 Parent(s): 60fb399

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -25
app.py CHANGED
@@ -7,7 +7,7 @@ import time
7
  from fastapi import FastAPI, HTTPException, Depends
8
  from fastapi.security.api_key import APIKeyHeader
9
  from pydantic import BaseModel
10
- from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
11
  from starlette.status import HTTP_403_FORBIDDEN, HTTP_503_SERVICE_UNAVAILABLE
12
 
13
  # --- 1. GLOBAL VARIABLES ---
@@ -16,17 +16,16 @@ model = None
16
  generated_keys = {}
17
 
18
  # --- 2. CONFIGURATION ---
19
- MODEL_PATH = "/app/model"
20
  API_KEY_NAME = "X-API-Key"
21
  api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=False)
22
 
23
- app = FastAPI(title="Overflow-111.7B Self-Registering API")
24
 
25
  # --- 3. MODEL & TOKENIZER LOADING ---
26
  print("Starting Engine: Initializing Self-Registration...")
27
 
28
  try:
29
- # Add model path to sys.path
30
  if MODEL_PATH not in sys.path:
31
  sys.path.insert(0, MODEL_PATH)
32
 
@@ -34,32 +33,25 @@ try:
34
  import configuration_overflow
35
  import modeling_overflow
36
 
37
- # Load config from configuration_overflow
38
  conf_class = configuration_overflow.OverflowConfig
 
 
39
  AutoConfig.register("overflow", conf_class)
40
- print("Successfully registered 'overflow' config.")
41
-
42
- # Find the correct model class that uses conf_class
43
- model_class_candidates = [
44
- getattr(modeling_overflow, name)
45
- for name in dir(modeling_overflow)
46
- if hasattr(getattr(modeling_overflow, name), "config_class")
47
- and getattr(modeling_overflow, name).config_class == conf_class
48
- ]
49
- if not model_class_candidates:
50
- raise Exception("No model class matches the OverflowConfig class.")
51
- model_class = model_class_candidates[0]
52
-
53
- # Register model class
54
  AutoModelForCausalLM.register(conf_class, model_class)
55
  print(f"Successfully registered model class {model_class.__name__}.")
56
 
57
- # Load tokenizer
58
- print("Loading Tokenizer...")
59
- tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, trust_remote_code=True)
 
 
 
 
 
 
60
 
61
- # Load model
62
- print("Loading Model Weights...")
63
  model = AutoModelForCausalLM.from_pretrained(
64
  MODEL_PATH,
65
  trust_remote_code=True,
@@ -67,7 +59,7 @@ try:
67
  torch_dtype=torch.bfloat16,
68
  low_cpu_mem_usage=True
69
  )
70
- print("Engine Status: ONLINE")
71
 
72
  except Exception as e:
73
  print(f"CRITICAL LOADING ERROR: {e}")
 
7
  from fastapi import FastAPI, HTTPException, Depends
8
  from fastapi.security.api_key import APIKeyHeader
9
  from pydantic import BaseModel
10
+ from transformers import AutoModelForCausalLM, AutoConfig, GPT2TokenizerFast
11
  from starlette.status import HTTP_403_FORBIDDEN, HTTP_503_SERVICE_UNAVAILABLE
12
 
13
  # --- 1. GLOBAL VARIABLES ---
 
16
  generated_keys = {}
17
 
18
  # --- 2. CONFIGURATION ---
19
+ MODEL_PATH = "/app/model" # folder with vocab.json, merges.txt, model weights
20
  API_KEY_NAME = "X-API-Key"
21
  api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=False)
22
 
23
+ app = FastAPI(title="Overflow-111.7B API")
24
 
25
  # --- 3. MODEL & TOKENIZER LOADING ---
26
  print("Starting Engine: Initializing Self-Registration...")
27
 
28
  try:
 
29
  if MODEL_PATH not in sys.path:
30
  sys.path.insert(0, MODEL_PATH)
31
 
 
33
  import configuration_overflow
34
  import modeling_overflow
35
 
36
+ # Register config and model
37
  conf_class = configuration_overflow.OverflowConfig
38
+ model_class = modeling_overflow.OverflowForCausalLM
39
+
40
  AutoConfig.register("overflow", conf_class)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  AutoModelForCausalLM.register(conf_class, model_class)
42
  print(f"Successfully registered model class {model_class.__name__}.")
43
 
44
+ # Load GPT2-style tokenizer from vocab.json + merges.txt
45
+ tokenizer = GPT2TokenizerFast(
46
+ vocab_file=f"{MODEL_PATH}/vocab.json",
47
+ merges_file=f"{MODEL_PATH}/merges.txt",
48
+ unk_token="",
49
+ bos_token="",
50
+ eos_token=""
51
+ )
52
+ print("Tokenizer loaded successfully.")
53
 
54
+ # Load model weights
 
55
  model = AutoModelForCausalLM.from_pretrained(
56
  MODEL_PATH,
57
  trust_remote_code=True,
 
59
  torch_dtype=torch.bfloat16,
60
  low_cpu_mem_usage=True
61
  )
62
+ print("Model loaded successfully. Engine Status: ONLINE")
63
 
64
  except Exception as e:
65
  print(f"CRITICAL LOADING ERROR: {e}")