arjunkmoorthy commited on
Commit
1fdd678
·
verified ·
1 Parent(s): b7b15c8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -5
app.py CHANGED
@@ -3,20 +3,25 @@ from fastapi import FastAPI
3
  from pydantic import BaseModel
4
  from unsloth import FastModel
5
 
6
- # Set cache directories
7
  os.environ["TRITON_CACHE_DIR"] = "/tmp/triton_cache"
 
 
 
8
  HF_CACHE = "/tmp/hf_cache"
9
  os.environ["TRANSFORMERS_CACHE"] = HF_CACHE
10
  os.environ["HF_HOME"] = HF_CACHE
 
 
11
  os.makedirs(HF_CACHE, exist_ok=True)
 
 
12
 
13
- # Create app
14
  app = FastAPI()
15
 
16
- # Use separate base and adapter model
17
  model, tokenizer = FastModel.from_pretrained(
18
- model_name = "microsoft/phi-2", # Base model
19
- adapter_name = "srikar-v05/phi3-Mini-Medical-Chat", # LoRA adapter
20
  load_in_4bit = True,
21
  max_seq_length = 2048,
22
  )
 
3
  from pydantic import BaseModel
4
  from unsloth import FastModel
5
 
6
+ # Fix permission errors for cache dirs
7
  os.environ["TRITON_CACHE_DIR"] = "/tmp/triton_cache"
8
+ os.environ["TORCHINDUCTOR_CACHE_DIR"] = "/tmp/torchinductor_cache"
9
+
10
+ # Optional: HF model cache
11
  HF_CACHE = "/tmp/hf_cache"
12
  os.environ["TRANSFORMERS_CACHE"] = HF_CACHE
13
  os.environ["HF_HOME"] = HF_CACHE
14
+
15
+ # Create cache dirs
16
  os.makedirs(HF_CACHE, exist_ok=True)
17
+ os.makedirs("/tmp/triton_cache", exist_ok=True)
18
+ os.makedirs("/tmp/torchinductor_cache", exist_ok=True)
19
 
 
20
  app = FastAPI()
21
 
 
22
  model, tokenizer = FastModel.from_pretrained(
23
+ model_name = "microsoft/phi-2",
24
+ adapter_name = "srikar-v05/phi3-Mini-Medical-Chat",
25
  load_in_4bit = True,
26
  max_seq_length = 2048,
27
  )