kripeshAlt commited on
Commit
57995b2
·
verified ·
1 Parent(s): f3ee5c3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -16
app.py CHANGED
@@ -5,25 +5,39 @@ import logging
5
  from typing import List
6
  import os
7
  import uuid
 
8
 
9
  # Configure logging
10
  logging.basicConfig(level=logging.INFO)
11
  logger = logging.getLogger(__name__)
12
 
13
  # Initialize FastAPI app
14
- app = FastAPI(title="DeepSeek CPU Hosting API")
15
 
16
  # Model configuration
17
- MODEL_NAME = "deepseek-ai/deepseek-llm-7b" # Example model, replace with actual DeepSeek model
18
  DEVICE = "cpu" # Force CPU usage
19
 
20
  # Load model and tokenizer
21
  try:
22
- logger.info("Loading model and tokenizer...")
23
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
24
- model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
25
- model.to(DEVICE)
26
- logger.info("Model loaded successfully!")
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  except Exception as e:
28
  logger.error(f"Failed to load model: {str(e)}")
29
  raise
@@ -34,9 +48,10 @@ API_KEYS = {}
34
  # Request models
35
  class GenerationRequest(BaseModel):
36
  prompt: str
37
- max_length: int = 100
38
  temperature: float = 0.7
39
  top_p: float = 0.9
 
40
 
41
  class APIKeyRequest(BaseModel):
42
  name: str
@@ -49,17 +64,27 @@ async def generate_text(api_key: str, request: GenerationRequest):
49
 
50
  try:
51
  inputs = tokenizer(request.prompt, return_tensors="pt").to(DEVICE)
52
- outputs = model.generate(
53
- **inputs,
54
- max_length=request.max_length,
55
- temperature=request.temperature,
56
- top_p=request.top_p,
57
- do_sample=True
58
- )
 
 
 
 
59
  generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
60
 
 
 
61
  logger.info(f"Generated text for API key: {api_key}")
62
- return {"generated_text": generated_text}
 
 
 
 
63
  except Exception as e:
64
  logger.error(f"Generation error: {str(e)}")
65
  raise HTTPException(status_code=500, detail=str(e))
 
5
  from typing import List
6
  import os
7
  import uuid
8
+ import torch
9
 
10
  # Configure logging
11
  logging.basicConfig(level=logging.INFO)
12
  logger = logging.getLogger(__name__)
13
 
14
  # Initialize FastAPI app
15
+ app = FastAPI(title="Phi-2 CPU Hosting API")
16
 
17
  # Model configuration
18
+ MODEL_NAME = "microsoft/phi-2"
19
  DEVICE = "cpu" # Force CPU usage
20
 
21
  # Load model and tokenizer
22
  try:
23
+ logger.info("Loading Phi-2 model and tokenizer...")
24
+
25
+ # Use bfloat16 if available for better performance on CPU
26
+ torch_dtype = torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float32
27
+
28
+ tokenizer = AutoTokenizer.from_pretrained(
29
+ MODEL_NAME,
30
+ trust_remote_code=True
31
+ )
32
+
33
+ model = AutoModelForCausalLM.from_pretrained(
34
+ MODEL_NAME,
35
+ torch_dtype=torch_dtype,
36
+ trust_remote_code=True,
37
+ device_map="cpu"
38
+ )
39
+
40
+ logger.info("Phi-2 model loaded successfully!")
41
  except Exception as e:
42
  logger.error(f"Failed to load model: {str(e)}")
43
  raise
 
48
  # Request models
49
  class GenerationRequest(BaseModel):
50
  prompt: str
51
+ max_length: int = 200
52
  temperature: float = 0.7
53
  top_p: float = 0.9
54
+ do_sample: bool = True
55
 
56
  class APIKeyRequest(BaseModel):
57
  name: str
 
64
 
65
  try:
66
  inputs = tokenizer(request.prompt, return_tensors="pt").to(DEVICE)
67
+
68
+ with torch.no_grad():
69
+ outputs = model.generate(
70
+ **inputs,
71
+ max_length=request.max_length,
72
+ temperature=request.temperature,
73
+ top_p=request.top_p,
74
+ do_sample=request.do_sample,
75
+ pad_token_id=tokenizer.eos_token_id
76
+ )
77
+
78
  generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
79
 
80
+ # Update usage count
81
+ API_KEYS[api_key]["usage_count"] += 1
82
  logger.info(f"Generated text for API key: {api_key}")
83
+
84
+ return {
85
+ "generated_text": generated_text,
86
+ "usage_count": API_KEYS[api_key]["usage_count"]
87
+ }
88
  except Exception as e:
89
  logger.error(f"Generation error: {str(e)}")
90
  raise HTTPException(status_code=500, detail=str(e))