viskav commited on
Commit
8bccb6d
·
verified ·
1 Parent(s): 2589cf0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -5
app.py CHANGED
@@ -3,6 +3,8 @@ from fastapi import FastAPI, HTTPException
3
  from pydantic import BaseModel
4
  from llama_cpp import Llama
5
  from contextlib import asynccontextmanager
 
 
6
 
7
  # =========================
8
  # MODEL CONFIG
@@ -10,6 +12,12 @@ from contextlib import asynccontextmanager
10
  MODEL_REPO = "bartowski/Phi-3.1-mini-4k-instruct-GGUF"
11
  MODEL_FILE = "Phi-3.1-mini-4k-instruct-IQ2_M.gguf"
12
 
 
 
 
 
 
 
13
  N_THREADS = int(os.getenv("N_THREADS", "8"))
14
  N_CTX = int(os.getenv("N_CTX", "2048"))
15
  N_BATCH = int(os.getenv("N_BATCH", "256"))
@@ -23,12 +31,13 @@ llm = None
23
  async def lifespan(app: FastAPI):
24
  global llm
25
  llm = Llama(
26
- model_path=MODEL_FILE,
27
- n_ctx=N_CTX,
28
- n_threads=N_THREADS,
29
- n_batch=N_BATCH,
30
- verbose=False,
31
  )
 
32
  yield
33
 
34
  app = FastAPI(title="AI Humanizer", lifespan=lifespan)
 
3
  from pydantic import BaseModel
4
  from llama_cpp import Llama
5
  from contextlib import asynccontextmanager
6
+ from huggingface_hub import hf_hub_download
7
+
8
 
9
  # =========================
10
  # MODEL CONFIG
 
12
  MODEL_REPO = "bartowski/Phi-3.1-mini-4k-instruct-GGUF"
13
  MODEL_FILE = "Phi-3.1-mini-4k-instruct-IQ2_M.gguf"
14
 
15
+ MODEL_PATH = hf_hub_download(
16
+ repo_id=MODEL_REPO,
17
+ filename=MODEL_FILE,
18
+ )
19
+
20
+
21
  N_THREADS = int(os.getenv("N_THREADS", "8"))
22
  N_CTX = int(os.getenv("N_CTX", "2048"))
23
  N_BATCH = int(os.getenv("N_BATCH", "256"))
 
31
  async def lifespan(app: FastAPI):
32
  global llm
33
  llm = Llama(
34
+ model_path=MODEL_PATH,
35
+ n_ctx=N_CTX,
36
+ n_threads=N_THREADS,
37
+ n_batch=N_BATCH,
38
+ verbose=False,
39
  )
40
+
41
  yield
42
 
43
  app = FastAPI(title="AI Humanizer", lifespan=lifespan)