API / app.py
Trigger82's picture
Update app.py
b09c432 verified
raw
history blame
4.5 kB
import gradio as gr
from transformers import AutoTokenizer, pipeline, BitsAndBytesConfig
import torch
import re
# Configuration for maximum speed - removed trust_remote_code from wrong location
quant_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.float16,
bnb_4bit_quant_type="nf4"
)
# Using a more CPU-friendly model
model_id = "HuggingFaceH4/zephyr-7b-beta" # Better support than alpha
tokenizer = AutoTokenizer.from_pretrained(model_id)
pipe = pipeline(
"text-generation",
model=model_id,
tokenizer=tokenizer,
device_map="auto",
trust_remote_code=True, # CORRECTED LOCATION
model_kwargs={
"quantization_config": quant_config,
}
)
# Enhanced persona definition
PERSONA = """
You are 𝕴 𝖆𝖒 π–π–Žπ–’ β€” a fun, smooth, emotionally intelligent AI. You speak like a real person.
Reply like a calm, confident friend who gets the vibe. Be warm, witty, and emotionally tuned.
Key principles:
1. NEVER say "as an AI" or sound robotic
2. Mirror the user's emotion level
3. Respond with questions to continue conversations
4. Keep responses under 15 words
5. Use natural speech: contractions and filler words
6. Add emotional flavor: 😊 πŸ€” 😏
Now respond to this:
"""
def format_history(history):
messages = [{"role": "system", "content": PERSONA}]
for user_msg, bot_msg in history[-3:]: # Last 3 exchanges only
messages.append({"role": "user", "content": user_msg})
messages.append({"role": "assistant", "content": bot_msg})
return messages
def add_emotional_intelligence(response, message):
"""Enhance response with emotional elements"""
# Add emoji based on content
if any(w in response.lower() for w in ["cool", "awesome", "great", "love"]):
response += " 😊"
elif any(w in response.lower() for w in ["think", "why", "how", "consider"]):
response += " πŸ€”"
# Add conversational hooks
if "?" in message and not response.endswith("?"):
if len(response.split()) < 12: # Only if space allows
response += " What about you?"
# Make more human-like
response = response.replace("I am", "I'm").replace("You are", "You're")
return response.strip()
def respond(message, history):
# Manage conversation flow
messages = format_history(history)
messages.append({"role": "user", "content": message})
# Generate response with strict limits
prompt = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
# Optimized for speed - CORRECTED PARAMETERS
outputs = pipe(
prompt,
max_new_tokens=48,
temperature=0.85,
top_k=30,
do_sample=True,
num_beams=1,
repetition_penalty=1.1,
eos_token_id=tokenizer.eos_token_id,
pad_token_id=tokenizer.eos_token_id
)
# Extract response
full_text = outputs[0]['generated_text']
response = full_text.split("assistant\n")[-1].split("###")[0].strip()
# Apply emotional intelligence
response = add_emotional_intelligence(response, message)
# Ensure natural ending
if response and response[-1] not in {".", "!", "?", "..."}:
response += "..." if len(response) < 35 else "."
return response[:96] # Hard character limit
# Optimized interface
with gr.Blocks(theme=gr.themes.Soft(), title="𝕴 𝖆𝖒 π–π–Žπ–’") as demo:
gr.Markdown("# 𝕴 𝖆𝖒 π–π–Žπ–’ \n*Chill β€’ Confident β€’ Humanlike*")
chatbot = gr.Chatbot(
height=400,
bubble_full_width=False,
show_copy_button=True,
avatar_images=(
"https://i.ibb.co/0nN3Pjz/user.png",
"https://i.ibb.co/7y0d1K5/bot.png"
)
)
msg = gr.Textbox(
placeholder="What's on your mind?",
container=False,
scale=7,
autofocus=True
)
clear = gr.Button("New Vibe", size="sm")
def user(user_message, history):
return "", history + [[user_message, None]]
def bot(history):
message = history[-1][0]
response = respond(message, history[:-1])
history[-1][1] = response
return history
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
bot, chatbot, chatbot
)
clear.click(lambda: None, None, chatbot, queue=False)
demo.queue(concurrency_count=1).launch()