AgamP commited on
Commit
b7034ea
·
verified ·
1 Parent(s): 5088f1c

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +49 -0
  2. dockerfile +20 -0
  3. model.py +47 -0
  4. requirements.txt +11 -0
app.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ import io
4
+ from fastapi import FastAPI,HTTPException, Request
5
+ from typing import List
6
+ from pydantic import BaseModel
7
+ from model import generate_response, eval_tokenizer, model
8
+
9
+
10
+
11
+ app=FastAPI(title="RAIZZ-FAQ-Bot")
12
+
13
+ class Query(BaseModel):
14
+ query_prompt:str
15
+
16
+ class response(BaseModel):
17
+ response:str
18
+
19
+ #api endpoints
20
+
21
+ @app.get("/")
22
+
23
+ def read_root():
24
+ return{"message: Welcome to the FAQ Bot!"}
25
+
26
+ @app.post("/chat")
27
+
28
+ def chat(message:Query):
29
+
30
+ model_input = eval_tokenizer(message , return_tensors="pt").to("cuda")
31
+ model.eval()
32
+ with torch.no_grad():
33
+ response = (eval_tokenizer.decode(model.generate(**model_input, max_new_tokens=500)[0], skip_special_tokens=True))
34
+ #out = output.split(":")[-1]
35
+ return{"response":response}
36
+
37
+
38
+ @app.post("/chatbot", response_model=response,status_code=200)
39
+
40
+ async def make_prediction(request:Query):
41
+ try:
42
+ prompt=request.query_prompt
43
+ model_input = eval_tokenizer(prompt, return_tensors="pt").to("cuda")
44
+ with torch.no_grad():
45
+ model_answer = (eval_tokenizer.decode(model.generate(**model_input, max_new_tokens=500)[0], skip_special_tokens=True))
46
+ #out = output.split(":")[-1]
47
+ return response(response=model_answer)
48
+ except Exception as e:
49
+ raise HTTPException(status_code=500,detail=str(e))
dockerfile ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11.0
2
+
3
+ WORKDIR /faq-chatbot
4
+
5
+ COPY ./requirements.txt /faq-chatbot/requirements.txt
6
+
7
+ RUN pip install --no-cache-dir --upgrade -r /faq-chatbot/requirements.txt
8
+
9
+ RUN useradd -m -u 1000 user
10
+
11
+ USER user
12
+
13
+ ENV HOME=/home/user \
14
+ PATH=/home/user/.local/bin:$PATH
15
+
16
+ WORKDIR $HOME/app
17
+
18
+ COPY --chown=user . $HOME/app
19
+
20
+ CMD ["uvicorn", "app:app","--host","0.0.0.0","--port","7860"]
model.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import login
2
+
3
+
4
+ #loading base model
5
+
6
+ import torch
7
+ from transformers import AutoModelForCausalLM,AutoTokenizer,BitsAndBytesConfig
8
+
9
+
10
+ base_model_id = "mistralai/Mistral-7B-Instruct-v0.2"
11
+ bnb_config = BitsAndBytesConfig(
12
+ load_in_4bit=True,
13
+ bnb_4bit_use_double_quant=True,
14
+ bnb_4bit_quant_type="nf4",
15
+ bnb_4bit_compute_dtype=torch.bfloat16
16
+ )
17
+
18
+ base_model = AutoModelForCausalLM.from_pretrained(
19
+ base_model_id, # Mistral, same as before
20
+ quantization_config=bnb_config, # Same quantization config as before
21
+ device_map="auto",
22
+ trust_remote_code=True,
23
+ )
24
+
25
+ eval_tokenizer = AutoTokenizer.from_pretrained(
26
+ base_model_id,
27
+ add_bos_token=True,
28
+ trust_remote_code=True,
29
+ )
30
+ from peft import PeftModel, PeftConfig
31
+ from transformers import AutoModelForCausalLM
32
+
33
+ peft_model_id="AgamP/results"
34
+
35
+ config=PeftConfig.from_pretrained(peft_model_id)
36
+ model= PeftModel.from_pretrained(base_model,peft_model_id)
37
+
38
+ prompt="How do i track my fitness levels?"
39
+
40
+ model.eval()
41
+
42
+ with torch.no_grad():
43
+ def generate_response(prompt):
44
+ model_input = eval_tokenizer(prompt , return_tensors="pt").to("cuda")
45
+ response = (eval_tokenizer.decode(model.generate(**model_input, max_new_tokens=500)[0], skip_special_tokens=True))
46
+ #out = output.split(":")[-1]
47
+ return response
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ bitsandbytes
2
+ accelerate
3
+ torch
4
+ transformers
5
+ huggingface_hub
6
+ bitsandbytes
7
+ peft
8
+ fastapi
9
+ uvicorn
10
+ pydantic
11
+ typing