Subodh358 commited on
Commit
a8a3d39
·
verified ·
1 Parent(s): 4b76799

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +14 -14
  2. app.py +21 -0
  3. requirements.txt +4 -0
Dockerfile CHANGED
@@ -1,20 +1,20 @@
1
- # Use the official PyTorch image with CUDA support
2
- FROM pytorch/pytorch:2.0.0-cuda11.7-cudnn8-runtime
3
 
4
- # Install git to clone the model repository
5
- RUN apt-get update && apt-get install -y git
6
 
7
- # Install other dependencies
8
- RUN pip install transformers
9
 
10
- # Clone the unsloth/llama-3-8b-bnb-4bit model repository
11
- RUN git clone https://huggingface.co/unsloth/llama-3-8b-bnb-4bit
12
 
13
- # Set the working directory to the model directory
14
- WORKDIR /unsloth/llama-3-8b-bnb-4bit
15
 
16
- # Copy the inference script
17
- COPY inference.py .
18
 
19
- # Run the inference script
20
- CMD ["python", "inference.py"]
 
1
+ # Use an official Python runtime as a parent image
2
+ FROM python:3.9-slim
3
 
4
+ # Set the working directory
5
+ WORKDIR /app
6
 
7
+ # Copy the current directory contents into the container at /app
8
+ COPY . /app
9
 
10
+ # Install any needed packages specified in requirements.txt
11
+ RUN pip install --no-cache-dir -r requirements.txt
12
 
13
+ # Make port 80 available to the world outside this container
14
+ EXPOSE 80
15
 
16
+ # Define environment variable
17
+ ENV NAME World
18
 
19
+ # Run app.py when the container launches
20
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "80"]
app.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ import torch
4
+
5
+ app = FastAPI()
6
+
7
+ # Load the model and tokenizer
8
+ model_name = "unsloth/llama-3-8b-bnb-4bit"
9
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
10
+ model = AutoModelForCausalLM.from_pretrained(model_name)
11
+
12
+ @app.get("/")
13
+ def read_root():
14
+ return {"Hello": "World"}
15
+
16
+ @app.post("/generate/")
17
+ def generate(prompt: str):
18
+ inputs = tokenizer(prompt, return_tensors="pt")
19
+ outputs = model.generate(inputs["input_ids"], max_length=50)
20
+ text = tokenizer.decode(outputs[0], skip_special_tokens=True)
21
+ return {"generated_text": text}
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ torch
4
+ transformers