Splashdude commited on
Commit
27950ee
·
verified ·
1 Parent(s): 5be55c9

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. README.md +1 -1
  2. app.py +31 -12
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: 🤖
4
  colorFrom: blue
5
  colorTo: purple
6
  sdk: gradio
7
- sdk_version: 4.44.1
8
  app_file: app.py
9
  pinned: true
10
  ---
 
4
  colorFrom: blue
5
  colorTo: purple
6
  sdk: gradio
7
+ sdk_version: 5.23.3
8
  app_file: app.py
9
  pinned: true
10
  ---
app.py CHANGED
@@ -1,24 +1,41 @@
1
- import torch
2
  import threading
 
3
  import gradio as gr
4
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
5
 
6
  MODEL_ID = "Splashdude/reasoning-chat-model-7b"
7
  SYSTEM_PROMPT = "You are a helpful, friendly AI assistant. You give clear, accurate, and concise answers."
8
 
9
- print("Loading model and tokenizer...")
10
- tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
11
- model = AutoModelForCausalLM.from_pretrained(
12
- MODEL_ID,
13
- torch_dtype=torch.float16,
14
- device_map="auto",
15
- trust_remote_code=True,
16
- )
17
- model.eval()
18
- print("Model loaded successfully!")
 
 
 
 
 
 
 
 
 
19
 
20
 
21
  def generate_response(message, history):
 
 
 
 
 
 
 
22
  messages = [{"role": "system", "content": SYSTEM_PROMPT}]
23
  for user_msg, bot_msg in history:
24
  messages.append({"role": "user", "content": user_msg})
@@ -31,7 +48,9 @@ def generate_response(message, history):
31
  )
32
  inputs = tokenizer(text, return_tensors="pt").to(model.device)
33
 
34
- streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
 
 
35
 
36
  generation_kwargs = {
37
  **inputs,
 
1
+ import os
2
  import threading
3
+ import torch
4
  import gradio as gr
5
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
6
 
7
  MODEL_ID = "Splashdude/reasoning-chat-model-7b"
8
  SYSTEM_PROMPT = "You are a helpful, friendly AI assistant. You give clear, accurate, and concise answers."
9
 
10
+ model = None
11
+ tokenizer = None
12
+
13
+
14
+ def load_model():
15
+ global model, tokenizer
16
+ if model is not None:
17
+ return
18
+ print("Loading model and tokenizer...")
19
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
20
+ model = AutoModelForCausalLM.from_pretrained(
21
+ MODEL_ID,
22
+ torch_dtype=torch.float16,
23
+ device_map="auto",
24
+ trust_remote_code=True,
25
+ low_cpu_mem_usage=True,
26
+ )
27
+ model.eval()
28
+ print("Model loaded successfully!")
29
 
30
 
31
  def generate_response(message, history):
32
+ if model is None or tokenizer is None:
33
+ try:
34
+ load_model()
35
+ except Exception as e:
36
+ yield f"Error loading model: {e}"
37
+ return
38
+
39
  messages = [{"role": "system", "content": SYSTEM_PROMPT}]
40
  for user_msg, bot_msg in history:
41
  messages.append({"role": "user", "content": user_msg})
 
48
  )
49
  inputs = tokenizer(text, return_tensors="pt").to(model.device)
50
 
51
+ streamer = TextIteratorStreamer(
52
+ tokenizer, skip_prompt=True, skip_special_tokens=True
53
+ )
54
 
55
  generation_kwargs = {
56
  **inputs,