ar08 commited on
Commit
e7dd500
·
verified ·
1 Parent(s): 42d2d19

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -0
app.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from pydantic import BaseModel
3
+ from llama_cpp import Llama
4
+ import os
5
+
6
+ # Initialize FastAPI app
7
+ app = FastAPI()
8
+
9
+ # Load the phi3 model using llama-cpp-python
10
+ model_path = "./Phi-3-mini-4k-instruct-q4.gguf" # Ensure this path is correct
11
+ llama_model = Llama(
12
+ model_path=model_path,
13
+ n_ctx=4096,
14
+ n_threads=8,
15
+ n_gpu_layers=35
16
+ )
17
+
18
+ # Request model based on OpenAI API structure
19
+ class RequestModel(BaseModel):
20
+ model: str
21
+ prompt: str
22
+ max_tokens: int = 256
23
+ temperature: float = 0.7
24
+ top_p: float = 1.0
25
+ n: int = 1
26
+ stream: bool = False
27
+ logprobs: int = None
28
+ stop: list = None
29
+
30
+ # Response model
31
+ class ResponseModel(BaseModel):
32
+ id: str
33
+ object: str
34
+ created: int
35
+ model: str
36
+ choices: list
37
+
38
+ @app.post("/v1/completions", response_model=ResponseModel)
39
+ def create_completion(request: RequestModel):
40
+ try:
41
+ # Generate text using the LLaMA model
42
+ response = llama_model(
43
+ prompt=request.prompt,
44
+ max_tokens=request.max_tokens,
45
+ temperature=request.temperature,
46
+ stop=request.stop if request.stop else None,
47
+ echo=True,
48
+ )
49
+
50
+ # Extract the generated text
51
+ generated_text = response["choices"][0]["text"]
52
+
53
+ # Build the response in OpenAI's format
54
+ return ResponseModel(
55
+ id="cmpl-xxxx", # Replace with unique ID generator if needed
56
+ object="text_completion",
57
+ created=int(os.time()),
58
+ model=request.model,
59
+ choices=[
60
+ {
61
+ "text": generated_text,
62
+ "index": 0,
63
+ "logprobs": None,
64
+ "finish_reason": "stop"
65
+ }
66
+ ]
67
+ )
68
+
69
+ except Exception as e:
70
+ raise HTTPException(status_code=500, detail=str(e))
71
+
72
+ if __name__ == "__main__":
73
+ import uvicorn
74
+ uvicorn.run(app, host="0.0.0.0", port=8000)