Mohansai2004 commited on
Commit
fe98a76
·
1 Parent(s): 9b556bc

feat: switch to deepseek model for token-free operation

Browse files
Files changed (2) hide show
  1. README.md +11 -11
  2. app.py +64 -35
README.md CHANGED
@@ -1,27 +1,27 @@
1
  ---
2
- title: AI Chat Assistant
3
- emoji: 💭
4
  colorFrom: blue
5
  colorTo: purple
6
  sdk: streamlit
7
  sdk_version: 1.41.1
8
  app_file: app.py
9
  pinned: false
10
- short_description: ChatGPT-like interface using Ollama
11
  ---
12
 
13
- # AI Chat Assistant
14
 
15
- Simple and efficient chat interface powered by Ollama DeepSeek model.
16
 
17
  ## Features
18
- - Clean chat interface
19
- - Message history
20
- - Streaming responses
21
- - Context awareness
22
- - Local inference
23
 
24
  ## Usage
25
  - Type your message and press Enter
26
  - Clear chat history using sidebar button
27
- - Maintains conversation context
 
1
  ---
2
+ title: DeepSeek R1 Chat
3
+ emoji: 🧠
4
  colorFrom: blue
5
  colorTo: purple
6
  sdk: streamlit
7
  sdk_version: 1.41.1
8
  app_file: app.py
9
  pinned: false
10
+ short_description: Advanced Chat using DeepSeek-R1-Distill-8B
11
  ---
12
 
13
+ # DeepSeek R1 Chat Assistant
14
 
15
+ Powerful chat interface powered by DeepSeek-R1-Distill-Llama-8B model.
16
 
17
  ## Features
18
+ - Advanced language understanding
19
+ - Context-aware responses
20
+ - Efficient 8B parameter model
21
+ - Local CPU inference
22
+ - Memory optimized
23
 
24
  ## Usage
25
  - Type your message and press Enter
26
  - Clear chat history using sidebar button
27
+ - Best for complex conversations
app.py CHANGED
@@ -3,27 +3,35 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
4
  import logging
5
 
6
- # Configure page and logging
7
- st.set_page_config(page_title="AI Chat Assistant", page_icon="💭", layout="wide")
 
 
 
 
 
 
 
8
  logging.basicConfig(level=logging.INFO)
9
 
10
- # Custom CSS for chat interface
11
  st.markdown("""
12
  <style>
13
  .stChat { padding: 20px; border-radius: 10px; }
14
  .user-message { background-color: #e6f3ff; }
15
  .assistant-message { background-color: #f0f2f6; }
 
16
  </style>
17
  """, unsafe_allow_html=True)
18
 
19
  @st.cache_resource
20
  def load_model():
21
- model_name = "deepseek-ai/deepseek-chat-1.3b-base" # Smaller DeepSeek model
22
 
23
  try:
24
  tokenizer = AutoTokenizer.from_pretrained(
25
  model_name,
26
- trust_remote_code=True
 
27
  )
28
  tokenizer.pad_token = tokenizer.eos_token
29
 
@@ -31,9 +39,12 @@ def load_model():
31
  model_name,
32
  torch_dtype=torch.float32,
33
  low_cpu_mem_usage=True,
34
- trust_remote_code=True
35
- ).to("cpu")
 
36
 
 
 
37
  return model, tokenizer
38
 
39
  except Exception as e:
@@ -42,30 +53,52 @@ def load_model():
42
 
43
  def generate_response(prompt, model, tokenizer):
44
  try:
45
- # Format prompt for DeepSeek chat
46
- chat_prompt = f"Human: {prompt}\n\nAssistant: Let me help you with that."
 
 
 
 
 
 
 
 
 
 
47
 
48
- inputs = tokenizer(chat_prompt, return_tensors="pt", padding=True)
49
  message_placeholder = st.empty()
50
- response_text = ""
51
 
52
  with torch.inference_mode():
53
- outputs = model.generate(
54
- inputs["input_ids"],
55
- max_length=512,
56
- temperature=0.7,
57
- top_p=0.95,
58
- do_sample=True,
59
- pad_token_id=tokenizer.eos_token_id,
60
- attention_mask=inputs["attention_mask"],
61
- num_return_sequences=1,
62
- repetition_penalty=1.2
63
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
66
- # Extract assistant's response
67
- response = response.split("Assistant:")[-1].strip()
68
- return response
69
 
70
  except Exception as e:
71
  st.error(f"Error: {str(e)}")
@@ -77,29 +110,24 @@ def init_chat():
77
  st.session_state.model, st.session_state.tokenizer = load_model()
78
 
79
  def main():
80
- st.title("💭 AI Chat Assistant")
81
  init_chat()
82
 
83
- # Sidebar with controls
84
  with st.sidebar:
85
- st.markdown("### Chat Controls")
86
- if st.button("🗑️ Clear Chat", use_container_width=True):
87
  st.session_state.messages = []
88
  st.rerun()
89
 
90
- # Display chat messages
91
  for message in st.session_state.messages:
92
  with st.chat_message(message["role"]):
93
  st.markdown(message["content"])
94
 
95
- # Chat input
96
- if prompt := st.chat_input("Send a message..."):
97
- # Add user message
98
  st.session_state.messages.append({"role": "user", "content": prompt})
99
  with st.chat_message("user"):
100
  st.markdown(prompt)
101
 
102
- # Generate and display assistant response
103
  with st.chat_message("assistant"):
104
  context = "\n".join([
105
  f"{m['role']}: {m['content']}"
@@ -113,6 +141,7 @@ def main():
113
  )
114
 
115
  if response:
 
116
  st.session_state.messages.append(
117
  {"role": "assistant", "content": response}
118
  )
 
3
  import torch
4
  import logging
5
 
6
+ # Configure page
7
+ st.set_page_config(
8
+ page_title="DeepSeek R1 Chat",
9
+ page_icon="🧠",
10
+ layout="wide",
11
+ initial_sidebar_state="expanded"
12
+ )
13
+
14
+ # Set up logging and style
15
  logging.basicConfig(level=logging.INFO)
16
 
 
17
  st.markdown("""
18
  <style>
19
  .stChat { padding: 20px; border-radius: 10px; }
20
  .user-message { background-color: #e6f3ff; }
21
  .assistant-message { background-color: #f0f2f6; }
22
+ .stButton button { background-color: #2E86C1; }
23
  </style>
24
  """, unsafe_allow_html=True)
25
 
26
  @st.cache_resource
27
  def load_model():
28
+ model_name = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
29
 
30
  try:
31
  tokenizer = AutoTokenizer.from_pretrained(
32
  model_name,
33
+ trust_remote_code=True,
34
+ padding_side='left'
35
  )
36
  tokenizer.pad_token = tokenizer.eos_token
37
 
 
39
  model_name,
40
  torch_dtype=torch.float32,
41
  low_cpu_mem_usage=True,
42
+ trust_remote_code=True,
43
+ device_map='cpu'
44
+ )
45
 
46
+ model.eval()
47
+ torch.set_num_threads(8)
48
  return model, tokenizer
49
 
50
  except Exception as e:
 
53
 
54
  def generate_response(prompt, model, tokenizer):
55
  try:
56
+ chat_prompt = f"""user
57
+ {prompt}
58
+ assistant
59
+ I'll help you with that."""
60
+
61
+ inputs = tokenizer(
62
+ chat_prompt,
63
+ return_tensors="pt",
64
+ padding=True,
65
+ truncation=True,
66
+ max_length=2048
67
+ )
68
 
69
+ # Create placeholder for streaming output
70
  message_placeholder = st.empty()
71
+ full_response = ""
72
 
73
  with torch.inference_mode():
74
+ generated_ids = []
75
+ for i in range(512): # Max new tokens
76
+ # Generate next token
77
+ outputs = model.generate(
78
+ inputs["input_ids"] if not generated_ids else torch.cat([inputs["input_ids"], torch.tensor([generated_ids]).to(model.device)], dim=1),
79
+ max_new_tokens=1,
80
+ temperature=0.7,
81
+ do_sample=True,
82
+ top_p=0.95,
83
+ repetition_penalty=1.1,
84
+ pad_token_id=tokenizer.eos_token_id
85
+ )
86
+
87
+ next_token = outputs[0][-1].item()
88
+ generated_ids.append(next_token)
89
+
90
+ # Decode and display current state
91
+ current_output = tokenizer.decode(generated_ids, skip_special_tokens=True)
92
+ full_response = current_output
93
+ message_placeholder.markdown(full_response)
94
+
95
+ # Check for end of generation
96
+ if next_token == tokenizer.eos_token_id:
97
+ break
98
 
99
+ # Clean up response
100
+ response = full_response.split("assistant")[-1].strip()
101
+ return response.split("user")[0].strip()
 
102
 
103
  except Exception as e:
104
  st.error(f"Error: {str(e)}")
 
110
  st.session_state.model, st.session_state.tokenizer = load_model()
111
 
112
  def main():
113
+ st.title("🧠 DeepSeek R1 Chat Assistant")
114
  init_chat()
115
 
 
116
  with st.sidebar:
117
+ st.markdown("### Chat Settings")
118
+ if st.button("🗑️ Clear History", use_container_width=True):
119
  st.session_state.messages = []
120
  st.rerun()
121
 
 
122
  for message in st.session_state.messages:
123
  with st.chat_message(message["role"]):
124
  st.markdown(message["content"])
125
 
126
+ if prompt := st.chat_input("Ask me anything..."):
 
 
127
  st.session_state.messages.append({"role": "user", "content": prompt})
128
  with st.chat_message("user"):
129
  st.markdown(prompt)
130
 
 
131
  with st.chat_message("assistant"):
132
  context = "\n".join([
133
  f"{m['role']}: {m['content']}"
 
141
  )
142
 
143
  if response:
144
+ st.markdown(response)
145
  st.session_state.messages.append(
146
  {"role": "assistant", "content": response}
147
  )