Spaces:
Sleeping
Sleeping
Alejadro Sanchez-Giraldo
commited on
Commit
·
0a1e0cd
1
Parent(s):
ba1b260
uplift from DeepSeek
Browse files- .gitignore +3 -1
- app.py +60 -12
- requirements.txt +1 -1
.gitignore
CHANGED
|
@@ -4,4 +4,6 @@ dschatbot/
|
|
| 4 |
|
| 5 |
__pycache__/
|
| 6 |
|
| 7 |
-
flagged
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
__pycache__/
|
| 6 |
|
| 7 |
+
flagged
|
| 8 |
+
|
| 9 |
+
query_logs.csv
|
app.py
CHANGED
|
@@ -1,23 +1,52 @@
|
|
| 1 |
import os
|
|
|
|
| 2 |
import gradio as gr
|
| 3 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 4 |
import torch
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
|
| 7 |
print("CUDA available: ", torch.cuda.is_available())
|
| 8 |
print("MPS available: ", torch.backends.mps.is_available())
|
| 9 |
|
|
|
|
| 10 |
tokenizer = AutoTokenizer.from_pretrained(
|
| 11 |
"deepseek-ai/deepseek-coder-1.3b-instruct", trust_remote_code=True)
|
| 12 |
model = AutoModelForCausalLM.from_pretrained(
|
| 13 |
"deepseek-ai/deepseek-coder-1.3b-instruct", trust_remote_code=True, torch_dtype=torch.bfloat16)
|
| 14 |
|
| 15 |
# Disable tokenizers parallelism warning
|
| 16 |
-
os.environ["TOKENIZERS_PARALLELISM"] = "
|
| 17 |
|
| 18 |
|
| 19 |
# Check if MPS (Metal Performance Shaders) is available
|
| 20 |
-
device = torch.device(
|
|
|
|
| 21 |
model = model.to(device)
|
| 22 |
|
| 23 |
# Theme builder
|
|
@@ -31,30 +60,49 @@ theme = gr.themes.Soft(
|
|
| 31 |
# Function to handle user input and generate a response
|
| 32 |
|
| 33 |
|
| 34 |
-
def chatbot_response(query):
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
response = "\n".join(
|
| 39 |
-
[f"{key}: {value}" for key, value in response.items()])
|
| 40 |
|
| 41 |
# Generate response using the model
|
| 42 |
messages = [{'role': 'user', 'content': query}]
|
| 43 |
inputs = tokenizer.apply_chat_template(
|
| 44 |
messages, add_generation_prompt=True, return_tensors="pt").to(model.device)
|
| 45 |
|
| 46 |
-
outputs = model.generate(
|
| 47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
model_response = tokenizer.decode(
|
| 49 |
outputs[0][len(inputs[0]):], skip_special_tokens=True)
|
| 50 |
|
| 51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
|
| 53 |
|
| 54 |
# Set up the Gradio interface
|
| 55 |
iface = gr.Interface(
|
| 56 |
fn=chatbot_response,
|
| 57 |
-
inputs=
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
outputs=gr.Textbox(label="Hope it helps!"),
|
| 59 |
theme=theme,
|
| 60 |
title="DSChatbot"
|
|
|
|
| 1 |
import os
|
| 2 |
+
import logging
|
| 3 |
import gradio as gr
|
| 4 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 5 |
import torch
|
| 6 |
+
import uuid
|
| 7 |
+
import time
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def capture_logs(log_body, log_file, uuid_label):
|
| 11 |
+
logger = logging.getLogger('MyApp')
|
| 12 |
+
logger.setLevel(logging.INFO)
|
| 13 |
+
|
| 14 |
+
# Check if handlers are already added to avoid duplication
|
| 15 |
+
if not logger.handlers:
|
| 16 |
+
fh = logging.FileHandler(log_file)
|
| 17 |
+
fh.setLevel(logging.INFO)
|
| 18 |
+
|
| 19 |
+
ch = logging.StreamHandler()
|
| 20 |
+
ch.setLevel(logging.INFO)
|
| 21 |
+
|
| 22 |
+
formatter = logging.Formatter(
|
| 23 |
+
'%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
| 24 |
+
fh.setFormatter(formatter)
|
| 25 |
+
ch.setFormatter(formatter)
|
| 26 |
+
|
| 27 |
+
logger.addHandler(fh)
|
| 28 |
+
logger.addHandler(ch)
|
| 29 |
+
|
| 30 |
+
logger.info('uuid: %s - %s', log_body, uuid_label)
|
| 31 |
+
return
|
| 32 |
|
| 33 |
|
| 34 |
print("CUDA available: ", torch.cuda.is_available())
|
| 35 |
print("MPS available: ", torch.backends.mps.is_available())
|
| 36 |
|
| 37 |
+
|
| 38 |
tokenizer = AutoTokenizer.from_pretrained(
|
| 39 |
"deepseek-ai/deepseek-coder-1.3b-instruct", trust_remote_code=True)
|
| 40 |
model = AutoModelForCausalLM.from_pretrained(
|
| 41 |
"deepseek-ai/deepseek-coder-1.3b-instruct", trust_remote_code=True, torch_dtype=torch.bfloat16)
|
| 42 |
|
| 43 |
# Disable tokenizers parallelism warning
|
| 44 |
+
os.environ["TOKENIZERS_PARALLELISM"] = "True"
|
| 45 |
|
| 46 |
|
| 47 |
# Check if MPS (Metal Performance Shaders) is available
|
| 48 |
+
device = torch.device(
|
| 49 |
+
"mps") if torch.backends.mps.is_available() else torch.device("cpu")
|
| 50 |
model = model.to(device)
|
| 51 |
|
| 52 |
# Theme builder
|
|
|
|
| 60 |
# Function to handle user input and generate a response
|
| 61 |
|
| 62 |
|
| 63 |
+
def chatbot_response(query, tokens, top_k, top_p):
|
| 64 |
+
uuid_label = str(uuid.uuid4())
|
| 65 |
+
|
| 66 |
+
start_time = time.time() # Start timer
|
|
|
|
|
|
|
| 67 |
|
| 68 |
# Generate response using the model
|
| 69 |
messages = [{'role': 'user', 'content': query}]
|
| 70 |
inputs = tokenizer.apply_chat_template(
|
| 71 |
messages, add_generation_prompt=True, return_tensors="pt").to(model.device)
|
| 72 |
|
| 73 |
+
outputs = model.generate(
|
| 74 |
+
inputs,
|
| 75 |
+
max_new_tokens=tokens,
|
| 76 |
+
do_sample=True,
|
| 77 |
+
top_k=top_k,
|
| 78 |
+
top_p=top_p,
|
| 79 |
+
num_return_sequences=1,
|
| 80 |
+
eos_token_id=tokenizer.eos_token_id
|
| 81 |
+
)
|
| 82 |
model_response = tokenizer.decode(
|
| 83 |
outputs[0][len(inputs[0]):], skip_special_tokens=True)
|
| 84 |
|
| 85 |
+
end_time = time.time() # End timer
|
| 86 |
+
performance_time = round(end_time - start_time, 2)
|
| 87 |
+
|
| 88 |
+
log_body = 'query: %s, pocessTime: %s, tokens: %s, top_k: %s, top_p: %s' % (query, performance_time, tokens, top_k, top_p)
|
| 89 |
+
|
| 90 |
+
capture_logs(uuid_label, 'query_logs.csv', log_body)
|
| 91 |
+
|
| 92 |
+
return model_response
|
| 93 |
|
| 94 |
|
| 95 |
# Set up the Gradio interface
|
| 96 |
iface = gr.Interface(
|
| 97 |
fn=chatbot_response,
|
| 98 |
+
inputs=[
|
| 99 |
+
gr.Textbox(label="Ask our DSChatbot Expert"),
|
| 100 |
+
gr.Slider(label="Max New Tokens", minimum=128,
|
| 101 |
+
maximum=2048, step=128, value=512),
|
| 102 |
+
gr.Slider(label="Top K", minimum=0, maximum=100, step=10, value=50),
|
| 103 |
+
gr.Slider(label="Top P", minimum=0.0,
|
| 104 |
+
maximum=1.0, step=0.1, value=0.95),
|
| 105 |
+
],
|
| 106 |
outputs=gr.Textbox(label="Hope it helps!"),
|
| 107 |
theme=theme,
|
| 108 |
title="DSChatbot"
|
requirements.txt
CHANGED
|
@@ -3,6 +3,6 @@ gradio==4.44.1
|
|
| 3 |
requests
|
| 4 |
transformers
|
| 5 |
minijinja
|
| 6 |
-
torch --extra-index-url https://download.pytorch.org/whl/
|
| 7 |
torchvision
|
| 8 |
torchaudio
|
|
|
|
| 3 |
requests
|
| 4 |
transformers
|
| 5 |
minijinja
|
| 6 |
+
torch --extra-index-url https://download.pytorch.org/whl/cu118
|
| 7 |
torchvision
|
| 8 |
torchaudio
|