Upload main.py
Browse files- backend/app/main.py +6 -11
backend/app/main.py
CHANGED
|
@@ -11,13 +11,12 @@ import torch
|
|
| 11 |
APP_TITLE = "HF Chat (Fathom-R1-14B)"
|
| 12 |
APP_VERSION = "0.2.0"
|
| 13 |
|
| 14 |
-
# ---- Config via ENV ----
|
| 15 |
MODEL_ID = os.getenv("MODEL_ID", "FractalAIResearch/Fathom-R1-14B")
|
| 16 |
PIPELINE_TASK = os.getenv("PIPELINE_TASK", "text-generation")
|
| 17 |
-
MAX_INPUT_TOKENS = int(os.getenv("MAX_INPUT_TOKENS", "8192"))
|
| 18 |
STATIC_DIR = os.getenv("STATIC_DIR", "/app/static")
|
| 19 |
ALLOWED_ORIGINS = os.getenv("ALLOWED_ORIGINS", "")
|
| 20 |
-
QUANTIZE = os.getenv("QUANTIZE", "auto")
|
| 21 |
|
| 22 |
app = FastAPI(title=APP_TITLE, version=APP_VERSION)
|
| 23 |
|
|
@@ -109,17 +108,13 @@ def messages_to_prompt(messages: List[Message]) -> str:
|
|
| 109 |
parts = []
|
| 110 |
for m in messages:
|
| 111 |
if m.role == "system":
|
| 112 |
-
parts.append(f"System: {m.content}
|
| 113 |
-
")
|
| 114 |
elif m.role == "user":
|
| 115 |
-
parts.append(f"User: {m.content}
|
| 116 |
-
")
|
| 117 |
else:
|
| 118 |
-
parts.append(f"Assistant: {m.content}
|
| 119 |
-
")
|
| 120 |
parts.append("Assistant:")
|
| 121 |
-
return "
|
| 122 |
-
".join(parts)
|
| 123 |
|
| 124 |
def truncate_prompt(prompt: str, max_tokens: int) -> str:
|
| 125 |
ids = tokenizer(prompt, return_tensors="pt", truncation=False)["input_ids"][0]
|
|
|
|
| 11 |
APP_TITLE = "HF Chat (Fathom-R1-14B)"
|
| 12 |
APP_VERSION = "0.2.0"
|
| 13 |
|
|
|
|
| 14 |
MODEL_ID = os.getenv("MODEL_ID", "FractalAIResearch/Fathom-R1-14B")
|
| 15 |
PIPELINE_TASK = os.getenv("PIPELINE_TASK", "text-generation")
|
| 16 |
+
MAX_INPUT_TOKENS = int(os.getenv("MAX_INPUT_TOKENS", "8192"))
|
| 17 |
STATIC_DIR = os.getenv("STATIC_DIR", "/app/static")
|
| 18 |
ALLOWED_ORIGINS = os.getenv("ALLOWED_ORIGINS", "")
|
| 19 |
+
QUANTIZE = os.getenv("QUANTIZE", "auto")
|
| 20 |
|
| 21 |
app = FastAPI(title=APP_TITLE, version=APP_VERSION)
|
| 22 |
|
|
|
|
| 108 |
parts = []
|
| 109 |
for m in messages:
|
| 110 |
if m.role == "system":
|
| 111 |
+
parts.append(f"System: {m.content}")
|
|
|
|
| 112 |
elif m.role == "user":
|
| 113 |
+
parts.append(f"User: {m.content}")
|
|
|
|
| 114 |
else:
|
| 115 |
+
parts.append(f"Assistant: {m.content}")
|
|
|
|
| 116 |
parts.append("Assistant:")
|
| 117 |
+
return "".join(parts)
|
|
|
|
| 118 |
|
| 119 |
def truncate_prompt(prompt: str, max_tokens: int) -> str:
|
| 120 |
ids = tokenizer(prompt, return_tensors="pt", truncation=False)["input_ids"][0]
|