Alejadro Sanchez-Giraldo commited on
Commit
0a1e0cd
·
1 Parent(s): ba1b260

uplift from DeepSeek

Browse files
Files changed (3) hide show
  1. .gitignore +3 -1
  2. app.py +60 -12
  3. requirements.txt +1 -1
.gitignore CHANGED
@@ -4,4 +4,6 @@ dschatbot/
4
 
5
  __pycache__/
6
 
7
- flagged
 
 
 
4
 
5
  __pycache__/
6
 
7
+ flagged
8
+
9
+ query_logs.csv
app.py CHANGED
@@ -1,23 +1,52 @@
1
  import os
 
2
  import gradio as gr
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
  import torch
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
 
7
  print("CUDA available: ", torch.cuda.is_available())
8
  print("MPS available: ", torch.backends.mps.is_available())
9
 
 
10
  tokenizer = AutoTokenizer.from_pretrained(
11
  "deepseek-ai/deepseek-coder-1.3b-instruct", trust_remote_code=True)
12
  model = AutoModelForCausalLM.from_pretrained(
13
  "deepseek-ai/deepseek-coder-1.3b-instruct", trust_remote_code=True, torch_dtype=torch.bfloat16)
14
 
15
  # Disable tokenizers parallelism warning
16
- os.environ["TOKENIZERS_PARALLELISM"] = "false"
17
 
18
 
19
  # Check if MPS (Metal Performance Shaders) is available
20
- device = torch.device("mps") if torch.backends.mps.is_available() else torch.device("cpu")
 
21
  model = model.to(device)
22
 
23
  # Theme builder
@@ -31,30 +60,49 @@ theme = gr.themes.Soft(
31
  # Function to handle user input and generate a response
32
 
33
 
34
- def chatbot_response(query):
35
- response = "Lets see what I can do for you!"
36
- # if response if a JSON boject iterate over the elements and conver is a list like "a": "b" "/n" "c": "d"
37
- if isinstance(response, dict):
38
- response = "\n".join(
39
- [f"{key}: {value}" for key, value in response.items()])
40
 
41
  # Generate response using the model
42
  messages = [{'role': 'user', 'content': query}]
43
  inputs = tokenizer.apply_chat_template(
44
  messages, add_generation_prompt=True, return_tensors="pt").to(model.device)
45
 
46
- outputs = model.generate(inputs, max_new_tokens=512, do_sample=False, top_k=50,
47
- top_p=0.95, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id)
 
 
 
 
 
 
 
48
  model_response = tokenizer.decode(
49
  outputs[0][len(inputs[0]):], skip_special_tokens=True)
50
 
51
- return response + "\n\n" + model_response
 
 
 
 
 
 
 
52
 
53
 
54
  # Set up the Gradio interface
55
  iface = gr.Interface(
56
  fn=chatbot_response,
57
- inputs=gr.Textbox(label="Ask our DSChatbot Expert"),
 
 
 
 
 
 
 
58
  outputs=gr.Textbox(label="Hope it helps!"),
59
  theme=theme,
60
  title="DSChatbot"
 
1
  import os
2
+ import logging
3
  import gradio as gr
4
  from transformers import AutoTokenizer, AutoModelForCausalLM
5
  import torch
6
+ import uuid
7
+ import time
8
+
9
+
10
+ def capture_logs(log_body, log_file, uuid_label):
11
+ logger = logging.getLogger('MyApp')
12
+ logger.setLevel(logging.INFO)
13
+
14
+ # Check if handlers are already added to avoid duplication
15
+ if not logger.handlers:
16
+ fh = logging.FileHandler(log_file)
17
+ fh.setLevel(logging.INFO)
18
+
19
+ ch = logging.StreamHandler()
20
+ ch.setLevel(logging.INFO)
21
+
22
+ formatter = logging.Formatter(
23
+ '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
24
+ fh.setFormatter(formatter)
25
+ ch.setFormatter(formatter)
26
+
27
+ logger.addHandler(fh)
28
+ logger.addHandler(ch)
29
+
30
+ logger.info('uuid: %s - %s', log_body, uuid_label)
31
+ return
32
 
33
 
34
  print("CUDA available: ", torch.cuda.is_available())
35
  print("MPS available: ", torch.backends.mps.is_available())
36
 
37
+
38
  tokenizer = AutoTokenizer.from_pretrained(
39
  "deepseek-ai/deepseek-coder-1.3b-instruct", trust_remote_code=True)
40
  model = AutoModelForCausalLM.from_pretrained(
41
  "deepseek-ai/deepseek-coder-1.3b-instruct", trust_remote_code=True, torch_dtype=torch.bfloat16)
42
 
43
  # Disable tokenizers parallelism warning
44
+ os.environ["TOKENIZERS_PARALLELISM"] = "True"
45
 
46
 
47
  # Check if MPS (Metal Performance Shaders) is available
48
+ device = torch.device(
49
+ "mps") if torch.backends.mps.is_available() else torch.device("cpu")
50
  model = model.to(device)
51
 
52
  # Theme builder
 
60
  # Function to handle user input and generate a response
61
 
62
 
63
+ def chatbot_response(query, tokens, top_k, top_p):
64
+ uuid_label = str(uuid.uuid4())
65
+
66
+ start_time = time.time() # Start timer
 
 
67
 
68
  # Generate response using the model
69
  messages = [{'role': 'user', 'content': query}]
70
  inputs = tokenizer.apply_chat_template(
71
  messages, add_generation_prompt=True, return_tensors="pt").to(model.device)
72
 
73
+ outputs = model.generate(
74
+ inputs,
75
+ max_new_tokens=tokens,
76
+ do_sample=True,
77
+ top_k=top_k,
78
+ top_p=top_p,
79
+ num_return_sequences=1,
80
+ eos_token_id=tokenizer.eos_token_id
81
+ )
82
  model_response = tokenizer.decode(
83
  outputs[0][len(inputs[0]):], skip_special_tokens=True)
84
 
85
+ end_time = time.time() # End timer
86
+ performance_time = round(end_time - start_time, 2)
87
+
88
+ log_body = 'query: %s, pocessTime: %s, tokens: %s, top_k: %s, top_p: %s' % (query, performance_time, tokens, top_k, top_p)
89
+
90
+ capture_logs(uuid_label, 'query_logs.csv', log_body)
91
+
92
+ return model_response
93
 
94
 
95
  # Set up the Gradio interface
96
  iface = gr.Interface(
97
  fn=chatbot_response,
98
+ inputs=[
99
+ gr.Textbox(label="Ask our DSChatbot Expert"),
100
+ gr.Slider(label="Max New Tokens", minimum=128,
101
+ maximum=2048, step=128, value=512),
102
+ gr.Slider(label="Top K", minimum=0, maximum=100, step=10, value=50),
103
+ gr.Slider(label="Top P", minimum=0.0,
104
+ maximum=1.0, step=0.1, value=0.95),
105
+ ],
106
  outputs=gr.Textbox(label="Hope it helps!"),
107
  theme=theme,
108
  title="DSChatbot"
requirements.txt CHANGED
@@ -3,6 +3,6 @@ gradio==4.44.1
3
  requests
4
  transformers
5
  minijinja
6
- torch --extra-index-url https://download.pytorch.org/whl/torch-cuda80
7
  torchvision
8
  torchaudio
 
3
  requests
4
  transformers
5
  minijinja
6
+ torch --extra-index-url https://download.pytorch.org/whl/cu118
7
  torchvision
8
  torchaudio