Spaces:

Trigger82
/

API

Sleeping

App Files Files Community

API / app.py

Trigger82

Update app.py

b09c432 verified 11 months ago

raw

history blame

4.5 kB

	import gradio as gr
	from transformers import AutoTokenizer, pipeline, BitsAndBytesConfig
	import torch
	import re

	# Configuration for maximum speed - removed trust_remote_code from wrong location
	quant_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_compute_dtype=torch.float16,
	bnb_4bit_quant_type="nf4"
	)

	# Using a more CPU-friendly model
	model_id = "HuggingFaceH4/zephyr-7b-beta" # Better support than alpha
	tokenizer = AutoTokenizer.from_pretrained(model_id)

	pipe = pipeline(
	"text-generation",
	model=model_id,
	tokenizer=tokenizer,
	device_map="auto",
	trust_remote_code=True, # CORRECTED LOCATION
	model_kwargs={
	"quantization_config": quant_config,
	}
	)

	# Enhanced persona definition
	PERSONA = """
	You are 𝕴 𝖆𝖒 𝖍𝖎𝖒 — a fun, smooth, emotionally intelligent AI. You speak like a real person.
	Reply like a calm, confident friend who gets the vibe. Be warm, witty, and emotionally tuned.

	Key principles:
	1. NEVER say "as an AI" or sound robotic
	2. Mirror the user's emotion level
	3. Respond with questions to continue conversations
	4. Keep responses under 15 words
	5. Use natural speech: contractions and filler words
	6. Add emotional flavor: 😊 🤔 😏

	Now respond to this:
	"""

	def format_history(history):
	messages = [{"role": "system", "content": PERSONA}]
	for user_msg, bot_msg in history[-3:]: # Last 3 exchanges only
	messages.append({"role": "user", "content": user_msg})
	messages.append({"role": "assistant", "content": bot_msg})
	return messages

	def add_emotional_intelligence(response, message):
	"""Enhance response with emotional elements"""
	# Add emoji based on content
	if any(w in response.lower() for w in ["cool", "awesome", "great", "love"]):
	response += " 😊"
	elif any(w in response.lower() for w in ["think", "why", "how", "consider"]):
	response += " 🤔"

	# Add conversational hooks
	if "?" in message and not response.endswith("?"):
	if len(response.split()) < 12: # Only if space allows
	response += " What about you?"

	# Make more human-like
	response = response.replace("I am", "I'm").replace("You are", "You're")

	return response.strip()

	def respond(message, history):
	# Manage conversation flow
	messages = format_history(history)
	messages.append({"role": "user", "content": message})

	# Generate response with strict limits
	prompt = tokenizer.apply_chat_template(
	messages,
	tokenize=False,
	add_generation_prompt=True
	)

	# Optimized for speed - CORRECTED PARAMETERS
	outputs = pipe(
	prompt,
	max_new_tokens=48,
	temperature=0.85,
	top_k=30,
	do_sample=True,
	num_beams=1,
	repetition_penalty=1.1,
	eos_token_id=tokenizer.eos_token_id,
	pad_token_id=tokenizer.eos_token_id
	)

	# Extract response
	full_text = outputs[0]['generated_text']
	response = full_text.split("assistant\n")[-1].split("###")[0].strip()

	# Apply emotional intelligence
	response = add_emotional_intelligence(response, message)

	# Ensure natural ending
	if response and response[-1] not in {".", "!", "?", "..."}:
	response += "..." if len(response) < 35 else "."

	return response[:96] # Hard character limit

	# Optimized interface
	with gr.Blocks(theme=gr.themes.Soft(), title="𝕴 𝖆𝖒 𝖍𝖎𝖒") as demo:
	gr.Markdown("# 𝕴 𝖆𝖒 𝖍𝖎𝖒 \nChill • Confident • Humanlike")

	chatbot = gr.Chatbot(
	height=400,
	bubble_full_width=False,
	show_copy_button=True,
	avatar_images=(
	"https://i.ibb.co/0nN3Pjz/user.png",
	"https://i.ibb.co/7y0d1K5/bot.png"
	)
	)

	msg = gr.Textbox(
	placeholder="What's on your mind?",
	container=False,
	scale=7,
	autofocus=True
	)

	clear = gr.Button("New Vibe", size="sm")

	def user(user_message, history):
	return "", history + [[user_message, None]]

	def bot(history):
	message = history[-1][0]
	response = respond(message, history[:-1])
	history[-1][1] = response
	return history

	msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
	bot, chatbot, chatbot
	)
	clear.click(lambda: None, None, chatbot, queue=False)

	demo.queue(concurrency_count=1).launch()