alessandroptsn commited on
Commit
5ff5c8e
·
verified ·
1 Parent(s): dd0f4ef

Upload 4 files

Browse files
Files changed (4) hide show
  1. Dockerfile +11 -0
  2. llm_func.py +22 -0
  3. main.py +18 -0
  4. requirements.txt +4 -0
Dockerfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ WORKDIR /code
4
+
5
+ COPY ./requirements.txt /code/requirements.txt
6
+
7
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
8
+
9
+ COPY . .
10
+
11
+ CMD ["uvicorn" , "main:app", "--host", "0.0.0.0","--port","7860"]
llm_func.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ from llama_cpp import Llama
3
+ model = Llama.from_pretrained(repo_id="tensorblock/SmolLM-135M-Instruct-GGUF",filename="*SmolLM-135M-Instruct-Q4_K_M.gguf",verbose=False,n_ctx=1000)
4
+
5
+
6
+ def mdl(input):
7
+ print(query)
8
+ start = time.time()
9
+ output = model(
10
+ prompt=f"""<|im_start|>system
11
+ You are a helpful chatbot.<|im_end|>
12
+ <|im_start|>user
13
+ {input}<|im_end|>""",
14
+ max_tokens=256,
15
+ temperature=0.1,
16
+ top_p=0.9,
17
+ echo=False,
18
+ stop=["#"])
19
+ end = time.time()
20
+ total_time = end - start
21
+ print(f"Execution time: {total_time:.2f} seconds")
22
+ return output["choices"][0]["text"].replace('\nassistant\n','')
main.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from fastapi.responses import PlainTextResponse
3
+ from pydantic import BaseModel
4
+ from llm_func import mdl
5
+
6
+ app = FastAPI()
7
+
8
+ class TextInput(BaseModel):
9
+ text: str
10
+
11
+
12
+ @app.post("/llm/", response_class=PlainTextResponse)
13
+ async def convert_to_llm(input_data: TextInput):
14
+ if not input_data.text:
15
+ raise HTTPException(status_code=400, detail="O texto não pode estar vazio.")
16
+
17
+ output_data = mdl(input_data.text)
18
+ return output_data
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ llama-cpp-python==0.3.6 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu
4
+ huggingface