Vishnu Mukundan commited on
Commit
34b18cd
·
1 Parent(s): f54a016

fastapi backend

Browse files
Files changed (3) hide show
  1. DOCKERFILE +12 -0
  2. app.py +24 -3
  3. requirements.txt +6 -1
DOCKERFILE ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM nvidia/cuda:12.1.0-base-ubuntu22.04
2
+
3
+ RUN apt-get update && apt-get install -y python3 python3-pip
4
+
5
+ WORKDIR /app
6
+
7
+ COPY requirements.txt .
8
+ RUN pip3 install -r requirements.txt
9
+
10
+ COPY app.py .
11
+
12
+ CMD ["python3", "app.py"]
app.py CHANGED
@@ -1,7 +1,28 @@
1
  from fastapi import FastAPI
 
 
2
 
3
  app = FastAPI()
4
 
5
- @app.get("/")
6
- def greet_json():
7
- return {"Hello": "World!"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from fastapi import FastAPI
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from vllm import LLM, SamplingParams
4
 
5
  app = FastAPI()
6
 
7
+ app.add_middleware(
8
+ CORSMiddleware,
9
+ allow_origins=["*"],
10
+ allow_methods=["*"],
11
+ allow_headers=["*"],
12
+ )
13
+
14
+ llama = LLM(
15
+ model="meta-llama/Llama-3.2-1B-Instruct",
16
+ dtype="half",
17
+ gpu_memory_utilization=0.9
18
+ )
19
+
20
+ @app.post("/generate")
21
+ def generate(prompt: str):
22
+ params = SamplingParams(temperature=0.7, max_tokens=100)
23
+ output = llama.generate([prompt], params)
24
+ return {"text": output[0].outputs[0].text}
25
+
26
+ if __name__ == "__main__":
27
+ import uvicorn
28
+ uvicorn.run(app, host="0.0.0.0", port=7860)
requirements.txt CHANGED
@@ -1,2 +1,7 @@
1
  fastapi
2
- uvicorn[standard]
 
 
 
 
 
 
1
  fastapi
2
+ uvicorn
3
+ vllm
4
+ transformers
5
+ torch
6
+ pillow
7
+ faster-whisper