Omkar008 commited on
Commit
5d10f05
·
verified ·
1 Parent(s): 65f58ba

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +29 -0
  2. llm.py +38 -0
  3. requirements.txt +27 -0
Dockerfile ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use the official Python image with a specific version
2
+ FROM python:3.12.5-slim
3
+
4
+ # Set the working directory in the container
5
+ WORKDIR /app
6
+
7
+ # Install system dependencies required for building Python packages
8
+ RUN apt-get update && apt-get install -y --no-install-recommends \
9
+ gcc \
10
+ g++ \
11
+ cmake \
12
+ git \
13
+ && apt-get clean \
14
+ && rm -rf /var/lib/apt/lists/*
15
+
16
+ # Copy the requirements file into the container
17
+ COPY requirements.txt .
18
+
19
+ # Install dependencies
20
+ RUN pip install --no-cache-dir -r requirements.txt
21
+
22
+ # Copy the application code into the container
23
+ COPY . .
24
+
25
+ # Expose the application port
26
+ EXPOSE 8000
27
+
28
+ # Define the command to run the application
29
+ CMD ["uvicorn", "llm:app", "--host", "0.0.0.0", "--port", "8000"]
llm.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from pydantic import BaseModel
3
+ from llama_cpp import Llama
4
+ from huggingface_hub import hf_hub_download
5
+
6
+
7
+ hf_hub_download(
8
+ repo_id="hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF",
9
+ filename="llama-3.2-1b-instruct-q4_k_m.gguf",
10
+ local_dir="./models"
11
+ )
12
+
13
+ # Initialize the LLM once when the application starts
14
+ llm = Llama(
15
+ model_path=f"models/llama-3.2-1b-instruct-q4_k_m.gguf"
16
+ )
17
+
18
+ app = FastAPI()
19
+
20
+ class ChatRequest(BaseModel):
21
+ message: str
22
+
23
+ @app.post("/chat")
24
+ async def chat_completion(request: ChatRequest):
25
+ try:
26
+ response = llm.create_chat_completion(
27
+ messages=[
28
+ {"role": "user", "content": request.message}
29
+ ]
30
+ )
31
+ return {
32
+ "response": response['choices'][0]['message']['content']
33
+ }
34
+ except Exception as e:
35
+ raise HTTPException(status_code=500, detail=str(e))
36
+
37
+ # To run the application:
38
+ # uvicorn filename:app --reload
requirements.txt ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ annotated-types==0.7.0
2
+ anyio==4.6.2.post1
3
+ certifi==2024.8.30
4
+ charset-normalizer==3.4.0
5
+ click==8.1.7
6
+ diskcache==5.6.3
7
+ fastapi==0.115.5
8
+ filelock==3.16.1
9
+ fsspec==2024.10.0
10
+ h11==0.14.0
11
+ huggingface-hub==0.26.2
12
+ idna==3.10
13
+ Jinja2==3.1.4
14
+ llama_cpp_python==0.3.2
15
+ MarkupSafe==3.0.2
16
+ numpy==2.1.3
17
+ packaging==24.2
18
+ pydantic==2.9.2
19
+ pydantic_core==2.23.4
20
+ PyYAML==6.0.2
21
+ requests==2.32.3
22
+ sniffio==1.3.1
23
+ starlette==0.41.2
24
+ tqdm==4.67.0
25
+ typing_extensions==4.12.2
26
+ urllib3==2.2.3
27
+ uvicorn==0.32.0