Proff12 commited on
Commit
6390c04
·
verified ·
1 Parent(s): 958f33b

Upload main.py

Browse files
Files changed (1) hide show
  1. backend/app/main.py +6 -11
backend/app/main.py CHANGED
@@ -11,13 +11,12 @@ import torch
11
  APP_TITLE = "HF Chat (Fathom-R1-14B)"
12
  APP_VERSION = "0.2.0"
13
 
14
- # ---- Config via ENV ----
15
  MODEL_ID = os.getenv("MODEL_ID", "FractalAIResearch/Fathom-R1-14B")
16
  PIPELINE_TASK = os.getenv("PIPELINE_TASK", "text-generation")
17
- MAX_INPUT_TOKENS = int(os.getenv("MAX_INPUT_TOKENS", "8192")) # keep prompt reasonable
18
  STATIC_DIR = os.getenv("STATIC_DIR", "/app/static")
19
  ALLOWED_ORIGINS = os.getenv("ALLOWED_ORIGINS", "")
20
- QUANTIZE = os.getenv("QUANTIZE", "auto") # auto|4bit|8bit|none
21
 
22
  app = FastAPI(title=APP_TITLE, version=APP_VERSION)
23
 
@@ -109,17 +108,13 @@ def messages_to_prompt(messages: List[Message]) -> str:
109
  parts = []
110
  for m in messages:
111
  if m.role == "system":
112
- parts.append(f"System: {m.content}
113
- ")
114
  elif m.role == "user":
115
- parts.append(f"User: {m.content}
116
- ")
117
  else:
118
- parts.append(f"Assistant: {m.content}
119
- ")
120
  parts.append("Assistant:")
121
- return "
122
- ".join(parts)
123
 
124
  def truncate_prompt(prompt: str, max_tokens: int) -> str:
125
  ids = tokenizer(prompt, return_tensors="pt", truncation=False)["input_ids"][0]
 
11
  APP_TITLE = "HF Chat (Fathom-R1-14B)"
12
  APP_VERSION = "0.2.0"
13
 
 
14
  MODEL_ID = os.getenv("MODEL_ID", "FractalAIResearch/Fathom-R1-14B")
15
  PIPELINE_TASK = os.getenv("PIPELINE_TASK", "text-generation")
16
+ MAX_INPUT_TOKENS = int(os.getenv("MAX_INPUT_TOKENS", "8192"))
17
  STATIC_DIR = os.getenv("STATIC_DIR", "/app/static")
18
  ALLOWED_ORIGINS = os.getenv("ALLOWED_ORIGINS", "")
19
+ QUANTIZE = os.getenv("QUANTIZE", "auto")
20
 
21
  app = FastAPI(title=APP_TITLE, version=APP_VERSION)
22
 
 
108
  parts = []
109
  for m in messages:
110
  if m.role == "system":
111
+ parts.append(f"System: {m.content}")
 
112
  elif m.role == "user":
113
+ parts.append(f"User: {m.content}")
 
114
  else:
115
+ parts.append(f"Assistant: {m.content}")
 
116
  parts.append("Assistant:")
117
+ return "".join(parts)
 
118
 
119
  def truncate_prompt(prompt: str, max_tokens: int) -> str:
120
  ids = tokenizer(prompt, return_tensors="pt", truncation=False)["input_ids"][0]