Erik22TY commited on
Commit
b328cc7
·
verified ·
1 Parent(s): 8984206

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -26
app.py CHANGED
@@ -1,47 +1,38 @@
1
  import gradio as gr
2
  import torch
3
- from transformers import AutoTokenizer
4
-
5
- from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
6
- from awq import AutoAWQForCausalLM
7
 
 
8
  MODEL_OPTIONS = {
9
- "Llama-3.2-3B": ("meta-llama/Llama-3.2-3B-Instruct", "transformers"),
10
- "Llama-3.2-1B": ("meta-llama/Llama-3.2-1B-Instruct", "transformers"),
11
- "OpenChat-3.5-0106-GPTQ": ("TheBloke/openchat-3.5-0106-GPTQ", "gptq"),
 
 
 
12
  }
13
 
14
  loaded = {}
15
- SYSTEM_PROMPT = "You are HugginGPT — a helpful assistant that remembers context and follows instructions."
16
 
17
  def load_model(model_key):
18
- model_id, mtype = MODEL_OPTIONS[model_key]
19
  if model_key in loaded:
20
  return loaded[model_key]
21
 
22
- if mtype == "transformers":
23
- from transformers import AutoModelForCausalLM
24
- tokenizer = AutoTokenizer.from_pretrained(model_id)
25
- model = AutoModelForCausalLM.from_pretrained(
26
- model_id,
27
- device_map="auto",
28
- torch_dtype=torch.float16
29
- )
30
- elif mtype == "gptq":
31
- quant_cfg = BaseQuantizeConfig(bits=4, group_size=64, desc_act=False)
32
- tokenizer = AutoTokenizer.from_pretrained(model_id)
33
- model = AutoGPTQForCausalLM.from_quantized(
34
- model_id,
35
- use_safetensors=True,
36
- device="cuda:0",
37
- quantize_config=quant_cfg
38
- )
39
  loaded[model_key] = (tokenizer, model)
40
  return tokenizer, model
41
 
42
  def generate_response(message, history, model_choice):
43
  tokenizer, model = load_model(model_choice)
44
 
 
45
  context = f"system: {SYSTEM_PROMPT}\n"
46
  if history:
47
  for u, a in history:
 
1
  import gradio as gr
2
  import torch
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
 
 
 
4
 
5
+ # Only transformer-loadable models
6
  MODEL_OPTIONS = {
7
+ "Llama-3.2-3B": "meta-llama/Llama-3.2-3B-Instruct",
8
+ "Llama-3.2-1B": "meta-llama/Llama-3.2-1B-Instruct",
9
+ "Mistral-7B-Instruct": "mistralai/Mistral-7B-Instruct-v0.1",
10
+ "Qwen2.5-3B-Instruct": "Qwen/Qwen2.5-3B-Instruct",
11
+ "Qwen2.5-1.5B-Instruct": "Qwen/Qwen2.5-1.5B-Instruct",
12
+ "StableLM2-1.6B": "stabilityai/stablelm-2-zephyr-1_6b",
13
  }
14
 
15
  loaded = {}
16
+ SYSTEM_PROMPT = "You are HugginGPT — a helpful assistant with memory."
17
 
18
  def load_model(model_key):
19
+ model_id = MODEL_OPTIONS[model_key]
20
  if model_key in loaded:
21
  return loaded[model_key]
22
 
23
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
24
+ model = AutoModelForCausalLM.from_pretrained(
25
+ model_id,
26
+ device_map="auto",
27
+ torch_dtype=torch.float16,
28
+ )
 
 
 
 
 
 
 
 
 
 
 
29
  loaded[model_key] = (tokenizer, model)
30
  return tokenizer, model
31
 
32
  def generate_response(message, history, model_choice):
33
  tokenizer, model = load_model(model_choice)
34
 
35
+ # build prompt with system + memory
36
  context = f"system: {SYSTEM_PROMPT}\n"
37
  if history:
38
  for u, a in history: