Spaces:

Trigger82
/

API

Sleeping

API

File size: 1,736 Bytes

917601d
06caece
74c9bed
8e2859c
d2b430b
 
 
 
16ce850
d2b430b
74c9bed
d2b430b
 
74c9bed
 
d2b430b
 
c13009b
06caece
c13009b
 
 
 
d2b430b
 
 
c13009b
06caece
48f88e8
d2b430b
342a40c
c13009b
74d6030
06caece
74d6030
c13009b
 
d2b430b
c13009b
 
 
d2b430b
c13009b
d2b430b
c13009b
d2b430b
c13009b
 
d2b430b
 
1c1f440

import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# Load lightweight model (CPU-only friendly)
model_id = "microsoft/phi-2"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)

# Persona style
PERSONA = """
[System: You are 𝕴 𝖆𝖒 𝖍𝖎𝖒 - a fun, smooth, emotionally intelligent AI. 
You speak like a chill human, not a bot. Keep replies under 15 words, natural, clever, emotional.]
"""

# Build prompt from history
def build_prompt(message, history):
    prompt = PERSONA
    for user, bot in history[-3:]:
        prompt += f"\nYou: {user}\n𝕴 𝖆𝖒 𝖍𝖎𝖒: {bot}"
    prompt += f"\nYou: {message}\n𝕴 𝖆𝖒 𝖍𝖎𝖒:"
    return prompt

# Generate a response
def generate(message, history):
    prompt = build_prompt(message, history)
    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(
        **inputs,
        max_new_tokens=50,
        temperature=0.9,
        top_k=50,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id
    )
    result = tokenizer.decode(outputs[0], skip_special_tokens=True)
    reply = result.split("𝕴 𝖆𝖒 𝖍𝖎𝖒:")[-1].split("\n")[0].strip()
    reply = " ".join(reply.split()[:15])  # max 15 words
    history.append((message, reply))
    return history, history

# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("# 𝕴 𝖆𝖒 𝖍𝖎𝖒 🤖\n*Vibing on CPU only - Hugging Face Free Tier*")
    chatbot = gr.Chatbot()
    msg = gr.Textbox(placeholder="Type your vibe...", show_label=False)
    state = gr.State([])

    msg.submit(generate, [msg, state], [chatbot, state])

demo.queue()
demo.launch()