File size: 2,462 Bytes
8b5528a
 
 
 
8393a22
8b5528a
8393a22
8b5528a
0bbc9b8
 
 
 
8393a22
0bbc9b8
8b5528a
8393a22
b787168
8b5528a
b787168
 
 
 
 
8393a22
49c770a
8393a22
b787168
 
8393a22
 
b787168
 
b560b2d
0bbc9b8
a74f8b5
b787168
 
 
8393a22
b787168
 
 
8393a22
b787168
 
 
 
a74f8b5
b787168
8393a22
b787168
8393a22
 
 
b787168
8393a22
 
 
 
b787168
 
 
 
8393a22
 
b787168
0bbc9b8
b787168
 
 
8393a22
b787168
49c770a
b787168
0bbc9b8
8393a22
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import gradio as gr
from transformers import pipeline
import torch

# 1. SETUP
model_id = "Qwen/Qwen2.5-0.5B-Instruct"
print("Loading Mochi... 0 Error Version.")

pipe = pipeline(
    "text-generation", 
    model=model_id, 
    device="cpu", 
    dtype=torch.float32 # Fixed the torch_dtype warning
)

# Persistent dictionary for separate users
user_memories = {}

def chat_logic(user_id, message):
    uid = str(user_id)
    if uid not in user_memories:
        user_memories[uid] = []
    
    system_prompt = "You are Mochi, a chill best friend. Talk like a 20-year-old. Keep it short."

    # Build history for the AI
    messages = [{"role": "system", "content": system_prompt}]
    
    # Only send last 8 messages to keep CPU fast
    for msg in user_memories[uid][-8:]:
        messages.append(msg)
    
    messages.append({"role": "user", "content": message})
    
    try:
        out = pipe(messages, max_new_tokens=100, do_sample=True, temperature=0.8, truncation=True)
        response = out[0]['generated_text'][-1]['content']
        
        # Save to memory
        user_memories[uid].append({"role": "user", "content": message})
        user_memories[uid].append({"role": "assistant", "content": response})
        
        # Keep internal memory small
        if len(user_memories[uid]) > 20:
            user_memories[uid] = user_memories[uid][-20:]
            
        return response
    except Exception as e:
        print(f"Error: {e}")
        return "brain lag lol. try again?"

# 2. THE WEB UI (Simplified for 0 errors)
with gr.Blocks() as demo:
    gr.Markdown("# 🐾 Mochi AI")
    
    user_id_input = gr.Textbox(label="User ID", value="default_user")
    # REMOVED type="messages" to stop the crash
    chatbot = gr.Chatbot(label="Chat with Mochi") 
    msg = gr.Textbox(label="Your Message", placeholder="Type here...")
    clear = gr.Button("Clear Chat")

    def respond(uid, message, chat_history):
        bot_message = chat_logic(uid, message)
        # For old Gradio, we append a [user, bot] list
        chat_history.append([message, bot_message])
        return "", chat_history

    msg.submit(respond, [user_id_input, msg, chatbot], [msg, chatbot])
    
    def clear_mem(uid):
        if uid in user_memories: user_memories[uid] = []
        return []
    
    clear.click(clear_mem, [user_id_input], [chatbot])

# 3. LAUNCH (Theme moved here to fix warning)
demo.launch(theme=gr.themes.Soft(primary_hue="pink"))