Spaces:

Trigger82
/

API

Sleeping

App Files Files Community

Trigger82 commited on May 30, 2025

Commit

342a40c

verified ·

1 Parent(s): 38d617e

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -50

app.py CHANGED Viewed

@@ -1,28 +1,19 @@
 import gradio as gr
-from transformers import AutoTokenizer, pipeline, BitsAndBytesConfig
 import torch
 import re
-# Configuration for maximum speed - removed trust_remote_code from wrong location
-quant_config = BitsAndBytesConfig(
-    load_in_4bit=True,
-    bnb_4bit_compute_dtype=torch.float16,
-    bnb_4bit_quant_type="nf4"
-)
-# Using a more CPU-friendly model
-model_id = "HuggingFaceH4/zephyr-7b-beta"  # Better support than alpha
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 pipe = pipeline(
     "text-generation",
     model=model_id,
     tokenizer=tokenizer,
-    device_map="auto",
-    trust_remote_code=True,  # CORRECTED LOCATION
-    model_kwargs={
-        "quantization_config": quant_config,
-    }
 )
 # Enhanced persona definition
@@ -43,7 +34,7 @@ Now respond to this:
 def format_history(history):
     messages = [{"role": "system", "content": PERSONA}]
-    for user_msg, bot_msg in history[-3:]:  # Last 3 exchanges only
         messages.append({"role": "user", "content": user_msg})
         messages.append({"role": "assistant", "content": bot_msg})
     return messages
@@ -51,19 +42,24 @@ def format_history(history):
 def add_emotional_intelligence(response, message):
     """Enhance response with emotional elements"""
     # Add emoji based on content
-    if any(w in response.lower() for w in ["cool", "awesome", "great", "love"]):
-        response += " 😊"
-    elif any(w in response.lower() for w in ["think", "why", "how", "consider"]):
-        response += " 🤔"
     # Add conversational hooks
     if "?" in message and not response.endswith("?"):
-        if len(response.split()) < 12:  # Only if space allows
-            response += " What about you?"
     # Make more human-like
     response = response.replace("I am", "I'm").replace("You are", "You're")
     return response.strip()
 def respond(message, history):
@@ -71,51 +67,41 @@ def respond(message, history):
     messages = format_history(history)
     messages.append({"role": "user", "content": message})
-    # Generate response with strict limits
-    prompt = tokenizer.apply_chat_template(
-        messages,
-        tokenize=False,
-        add_generation_prompt=True
-    )
-    # Optimized for speed - CORRECTED PARAMETERS
     outputs = pipe(
         prompt,
-        max_new_tokens=48,
-        temperature=0.85,
-        top_k=30,
         do_sample=True,
-        num_beams=1,
         repetition_penalty=1.1,
-        eos_token_id=tokenizer.eos_token_id,
-        pad_token_id=tokenizer.eos_token_id
     )
     # Extract response
-    full_text = outputs[0]['generated_text']
-    response = full_text.split("assistant\n")[-1].split("###")[0].strip()
     # Apply emotional intelligence
     response = add_emotional_intelligence(response, message)
-    # Ensure natural ending
-    if response and response[-1] not in {".", "!", "?", "..."}:
-        response += "..." if len(response) < 35 else "."
-    return response[:96]  # Hard character limit
-# Optimized interface
 with gr.Blocks(theme=gr.themes.Soft(), title="𝕴 𝖆𝖒 𝖍𝖎𝖒") as demo:
     gr.Markdown("# 𝕴 𝖆𝖒 𝖍𝖎𝖒  \n*Chill • Confident • Humanlike*")
     chatbot = gr.Chatbot(
-        height=400,
-        bubble_full_width=False,
-        show_copy_button=True,
-        avatar_images=(
-            "https://i.ibb.co/0nN3Pjz/user.png",
-            "https://i.ibb.co/7y0d1K5/bot.png"
-        )
     )
     msg = gr.Textbox(

 import gradio as gr
+from transformers import AutoTokenizer, pipeline
 import torch
 import re
+# Free-tier optimized model (lightweight but capable)
+model_id = "HuggingFaceH4/zephyr-7b-alpha"  # Smaller than beta
 tokenizer = AutoTokenizer.from_pretrained(model_id)
+# Free-tier friendly setup (no quantization needed)
 pipe = pipeline(
     "text-generation",
     model=model_id,
     tokenizer=tokenizer,
+    device="cpu",  # Force CPU-only
+    torch_dtype=torch.float32,  # Use float32 for CPU compatibility
 )
 # Enhanced persona definition
 def format_history(history):
     messages = [{"role": "system", "content": PERSONA}]
+    for user_msg, bot_msg in history[-2:]:  # Only last 2 exchanges
         messages.append({"role": "user", "content": user_msg})
         messages.append({"role": "assistant", "content": bot_msg})
     return messages
 def add_emotional_intelligence(response, message):
     """Enhance response with emotional elements"""
     # Add emoji based on content
+    if "!" in message:
+        response = response.replace(".", "!") + " 😊"
+    elif "?" in message:
+        response = response + " 🤔" if not response.endswith("?") else response
     # Add conversational hooks
     if "?" in message and not response.endswith("?"):
+        if len(response) < 60:
+            response += " How about you?"
     # Make more human-like
     response = response.replace("I am", "I'm").replace("You are", "You're")
+    # Free-tier: Limit to 15 words max
+    words = response.split()
+    if len(words) > 15:
+        response = " ".join(words[:15]) + "..."
     return response.strip()
 def respond(message, history):
     messages = format_history(history)
     messages.append({"role": "user", "content": message})
+    # Create prompt manually (lightweight)
+    prompt = ""
+    for msg in messages:
+        role = "User" if msg["role"] == "user" else "Assistant"
+        prompt += f"{role}: {msg['content']}\n"
+    prompt += "Assistant:"
+    # Free-tier optimized generation
     outputs = pipe(
         prompt,
+        max_new_tokens=48,      # Short responses
+        temperature=0.9,
+        top_k=40,
         do_sample=True,
+        num_beams=1,            # Fastest decoding
         repetition_penalty=1.1,
+        no_repeat_ngram_size=2
     )
     # Extract response
+    response = outputs[0]['generated_text'].split("Assistant:")[-1].strip()
     # Apply emotional intelligence
     response = add_emotional_intelligence(response, message)
+    # Free-tier safety
+    return response[:80]  # Hard character limit
+# Lightweight interface
 with gr.Blocks(theme=gr.themes.Soft(), title="𝕴 𝖆𝖒 𝖍𝖎𝖒") as demo:
     gr.Markdown("# 𝕴 𝖆𝖒 𝖍𝖎𝖒  \n*Chill • Confident • Humanlike*")
     chatbot = gr.Chatbot(
+        height=350,
+        bubble_full_width=False
     )
     msg = gr.Textbox(