kripeshAlt commited on
Commit
b4623d2
·
verified ·
1 Parent(s): 8100d8d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -4
app.py CHANGED
@@ -16,14 +16,17 @@ app = FastAPI(title="Phi-2 CPU Hosting API")
16
 
17
  # Model configuration
18
  MODEL_NAME = "microsoft/phi-2"
19
- DEVICE = "cpu" # Force CPU usage
 
 
 
20
 
21
  # Load model and tokenizer
22
  try:
23
  logger.info("Loading Phi-2 model and tokenizer...")
24
 
25
- # Use bfloat16 if available for better performance on CPU
26
- torch_dtype = torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float32
27
 
28
  tokenizer = AutoTokenizer.from_pretrained(
29
  MODEL_NAME,
@@ -63,7 +66,7 @@ async def generate_text(api_key: str, request: GenerationRequest):
63
  raise HTTPException(status_code=401, detail="Invalid API key")
64
 
65
  try:
66
- inputs = tokenizer(request.prompt, return_tensors="pt").to(DEVICE)
67
 
68
  with torch.no_grad():
69
  outputs = model.generate(
 
16
 
17
  # Model configuration
18
  MODEL_NAME = "microsoft/phi-2"
19
+
20
+ # Force CPU usage and disable CUDA
21
+ os.environ["CUDA_VISIBLE_DEVICES"] = ""
22
+ torch.set_default_device("cpu")
23
 
24
  # Load model and tokenizer
25
  try:
26
  logger.info("Loading Phi-2 model and tokenizer...")
27
 
28
+ # Explicitly set to CPU and float32
29
+ torch_dtype = torch.float32
30
 
31
  tokenizer = AutoTokenizer.from_pretrained(
32
  MODEL_NAME,
 
66
  raise HTTPException(status_code=401, detail="Invalid API key")
67
 
68
  try:
69
+ inputs = tokenizer(request.prompt, return_tensors="pt")
70
 
71
  with torch.no_grad():
72
  outputs = model.generate(