Spaces:

Erik22TY
/

HugginGPT

Sleeping

App Files Files Community

Erik22TY commited on 26 days ago

Commit

b328cc7

verified ·

1 Parent(s): 8984206

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -26

app.py CHANGED Viewed

@@ -1,47 +1,38 @@
 import gradio as gr
 import torch
-from transformers import AutoTokenizer
-from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
-from awq import AutoAWQForCausalLM
 MODEL_OPTIONS = {
-    "Llama-3.2-3B": ("meta-llama/Llama-3.2-3B-Instruct", "transformers"),
-    "Llama-3.2-1B": ("meta-llama/Llama-3.2-1B-Instruct", "transformers"),
-    "OpenChat-3.5-0106-GPTQ": ("TheBloke/openchat-3.5-0106-GPTQ", "gptq"),
 }
 loaded = {}
-SYSTEM_PROMPT = "You are HugginGPT — a helpful assistant that remembers context and follows instructions."
 def load_model(model_key):
-    model_id, mtype = MODEL_OPTIONS[model_key]
     if model_key in loaded:
         return loaded[model_key]
-    if mtype == "transformers":
-        from transformers import AutoModelForCausalLM
-        tokenizer = AutoTokenizer.from_pretrained(model_id)
-        model = AutoModelForCausalLM.from_pretrained(
-            model_id,
-            device_map="auto",
-            torch_dtype=torch.float16
-        )
-    elif mtype == "gptq":
-        quant_cfg = BaseQuantizeConfig(bits=4, group_size=64, desc_act=False)
-        tokenizer = AutoTokenizer.from_pretrained(model_id)
-        model = AutoGPTQForCausalLM.from_quantized(
-            model_id,
-            use_safetensors=True,
-            device="cuda:0",
-            quantize_config=quant_cfg
-        )
     loaded[model_key] = (tokenizer, model)
     return tokenizer, model
 def generate_response(message, history, model_choice):
     tokenizer, model = load_model(model_choice)
     context = f"system: {SYSTEM_PROMPT}\n"
     if history:
         for u, a in history:

 import gradio as gr
 import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
+# Only transformer-loadable models
 MODEL_OPTIONS = {
+    "Llama-3.2-3B": "meta-llama/Llama-3.2-3B-Instruct",
+    "Llama-3.2-1B": "meta-llama/Llama-3.2-1B-Instruct",
+    "Mistral-7B-Instruct": "mistralai/Mistral-7B-Instruct-v0.1",
+    "Qwen2.5-3B-Instruct": "Qwen/Qwen2.5-3B-Instruct",
+    "Qwen2.5-1.5B-Instruct": "Qwen/Qwen2.5-1.5B-Instruct",
+    "StableLM2-1.6B": "stabilityai/stablelm-2-zephyr-1_6b",
 }
 loaded = {}
+SYSTEM_PROMPT = "You are HugginGPT — a helpful assistant with memory."
 def load_model(model_key):
+    model_id = MODEL_OPTIONS[model_key]
     if model_key in loaded:
         return loaded[model_key]
+    tokenizer = AutoTokenizer.from_pretrained(model_id)
+    model = AutoModelForCausalLM.from_pretrained(
+        model_id,
+        device_map="auto",
+        torch_dtype=torch.float16,
+    )
     loaded[model_key] = (tokenizer, model)
     return tokenizer, model
 def generate_response(message, history, model_choice):
     tokenizer, model = load_model(model_choice)
+    # build prompt with system + memory
     context = f"system: {SYSTEM_PROMPT}\n"
     if history:
         for u, a in history: