Spaces:

reactallegany
/

promptlab

Runtime error

App Files Files Community

bditto commited on Apr 16, 2025

Commit

e3cea63

verified ·

1 Parent(s): c31d961

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -40

app.py CHANGED Viewed

@@ -1,43 +1,21 @@
 import gradio as gr
 import torch
-from transformers import (
-    AutoModelForCausalLM,
-    AutoTokenizer,
-    TextIteratorStreamer,
-    pipeline,
-    BitsAndBytesConfig
-)
 from threading import Thread
 import random
-# Configuration 🛠️
 model_name = "HuggingFaceH4/zephyr-7b-beta"
 device = "cuda" if torch.cuda.is_available() else "cpu"
-# Quantization setup
-quantization_config = BitsAndBytesConfig(
-    load_in_4bit=True,
-    bnb_4bit_quant_type="nf4",
-    bnb_4bit_compute_dtype=torch.float16,
-    bnb_4bit_use_double_quant=True,
 )
-# Model loading with fallback
-try:
-    model = AutoModelForCausalLM.from_pretrained(
-        model_name,
-        quantization_config=quantization_config if device == "cuda" else None,
-        device_map="auto",
-        torch_dtype=torch.float16 if device == "cuda" else torch.float32
-    )
-except Exception as e:
-    print(f"Error loading model with GPU: {e}")
-    model = AutoModelForCausalLM.from_pretrained(
-        model_name,
-        device_map="cpu",
-        torch_dtype=torch.float32
-    )
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 # Safety tools 🛡️
@@ -47,11 +25,7 @@ SAFE_IDEAS = [
     "Code a game about recycling ♻️",
     "Plan an AI tool for school safety 🚸"
 ]
-safety_checker = pipeline(
-    "text-classification",
-    model="unitary/toxic-bert",
-    device=0 if device == "cuda" else -1
-)
 def is_safe(text):
     text = text.lower()
@@ -66,7 +40,7 @@ def respond(message, history, system_message, max_tokens, temperature, top_p):
     messages = [{"role": "system", "content": system_message}]
-    for user_msg, bot_msg in history[-5:]:
         if user_msg:
             messages.append({"role": "user", "content": user_msg})
         if bot_msg:
@@ -82,7 +56,7 @@ def respond(message, history, system_message, max_tokens, temperature, top_p):
     streamer = TextIteratorStreamer(tokenizer, skip_prompt=True)
     generation_kwargs = {
         "inputs": inputs,
-        "max_new_tokens": max_tokens,
         "temperature": temperature,
         "top_p": top_p,
         "streamer": streamer
@@ -102,9 +76,9 @@ with gr.Blocks() as demo:
         respond,
         additional_inputs=[
             gr.Textbox("You help students create ethical AI projects.", label="Guidelines"),
-            gr.Slider(128, 1024, value=512, label="Max Response Length"),
-            gr.Slider(0.1, 1.0, value=0.3, label="Creativity Level"),
-            gr.Slider(0.7, 1.0, value=0.85, label="Focus Level")
         ],
         examples=[
             ["How to build a robot that plants trees?"],

 import gradio as gr
 import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, pipeline
 from threading import Thread
 import random
+# Use CPU-friendly configuration 🖥️
 model_name = "HuggingFaceH4/zephyr-7b-beta"
 device = "cuda" if torch.cuda.is_available() else "cpu"
+# Load model with CPU optimization
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    device_map="auto",
+    torch_dtype=torch.float32,
+    low_cpu_mem_usage=True
 )
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 # Safety tools 🛡️
     "Code a game about recycling ♻️",
     "Plan an AI tool for school safety 🚸"
 ]
+safety_checker = pipeline("text-classification", model="unitary/toxic-bert")
 def is_safe(text):
     text = text.lower()
     messages = [{"role": "system", "content": system_message}]
+    for user_msg, bot_msg in history[-3:]:  # Reduce history length for CPU
         if user_msg:
             messages.append({"role": "user", "content": user_msg})
         if bot_msg:
     streamer = TextIteratorStreamer(tokenizer, skip_prompt=True)
     generation_kwargs = {
         "inputs": inputs,
+        "max_new_tokens": min(max_tokens, 256),  # Limit tokens for CPU
         "temperature": temperature,
         "top_p": top_p,
         "streamer": streamer
         respond,
         additional_inputs=[
             gr.Textbox("You help students create ethical AI projects.", label="Guidelines"),
+            gr.Slider(64, 512, value=256, label="Max Response Length"),
+            gr.Slider(0.1, 1.0, value=0.5, label="Creativity Level"),
+            gr.Slider(0.7, 1.0, value=0.9, label="Focus Level")
         ],
         examples=[
             ["How to build a robot that plants trees?"],