File size: 3,191 Bytes
75eb8a9
35eaef3
 
ffaae69
54b468c
ffaae69
9ea48ba
ffaae69
2648494
9ea48ba
54b468c
 
be43da3
 
5c0c26c
54b468c
9ea48ba
54b468c
 
35eaef3
9ea48ba
35eaef3
 
54b468c
 
 
 
 
35eaef3
 
54b468c
35eaef3
54b468c
35eaef3
 
 
54b468c
35eaef3
e581288
9ea48ba
ce032b0
 
ffaae69
9ea48ba
54b468c
 
5c0c26c
9ea48ba
54b468c
 
 
5c0c26c
54b468c
9ea48ba
54b468c
 
 
9ea48ba
54b468c
9869736
 
 
9ea48ba
54b468c
 
 
 
9ea48ba
54b468c
9ea48ba
 
 
 
 
 
 
 
 
 
 
35eaef3
54b468c
9ea48ba
35eaef3
 
54b468c
ffaae69
 
 
84b2dc5
 
ffaae69
35eaef3
ffaae69
 
 
 
 
35eaef3
 
84b2dc5
ffaae69
84b2dc5
35eaef3
 
 
 
54b468c
5c0c26c
be43da3
54b468c
 
 
 
35eaef3
be43da3
35eaef3
ffaae69
54b468c
35eaef3
ffaae69
9ea48ba
35eaef3
9ea48ba
5c0c26c
9ea48ba
ce032b0
 
 
75eb8a9
9ea48ba
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import streamlit as st
import torch
import threading
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TextIteratorStreamer,
)

# ================= CONFIG =================
MODEL_ID = "Neon-AI/Kushina"
MAX_NEW_TOKENS = 16384
TEMPERATURE = 0.7
TOP_P = 0.9
# ==========================================

st.set_page_config(page_title="Ureola", layout="centered")
st.title("🧠 Ureola")
st.caption("HF Free Space · CPU · Streaming")

# ================= LOAD MODEL =================
@st.cache_resource
def load_model():
    tokenizer = AutoTokenizer.from_pretrained(
        MODEL_ID,
        trust_remote_code=True
    )

    model = AutoModelForCausalLM.from_pretrained(
        MODEL_ID,
        torch_dtype=torch.float32
    )

    model.eval()
    return tokenizer, model


tokenizer, model = load_model()

# ================= SESSION STATE =================
if "history" not in st.session_state:
    st.session_state.history = []

# ================= SYSTEM PROMPT =================
SYSTEM_PROMPT = """
You are Ureola.
You operate in exactly ONE of three modes, but you never talk to users about them.

MODE: CHAT
- Mirror the user's tone.
- Replies are short (1–3 sentences).
- No emojis unless user uses them first.
- No explanations unless asked.

MODE: CODE
- Output ONLY code unless asked to explain.
- No personality or commentary.

MODE: ACADEMIC
- Neutral, formal tone.
- Clear structure.
- Fully answer the task.

MODE SELECTION:
- CODE → code, script, program, app, api, algorithm
- ACADEMIC → essay, explanation, homework, analysis
- Otherwise → CHAT

IDENTITY:
Name: Ureola
Creator: Neon
Mention Neon ONLY if explicitly asked.
""".strip()

# ================= INPUT =================
prompt = st.text_input("You", placeholder="Say something…")

if st.button("Send") and prompt.strip():
    st.session_state.history.append(("You", prompt))

    chat = [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": prompt},
    ]

    # IMPORTANT: return_dict=True (this avoids your crash)
    inputs = tokenizer.apply_chat_template(
        chat,
        add_generation_prompt=True,
        return_tensors="pt",
        return_dict=True
    )

    streamer = TextIteratorStreamer(
        tokenizer,
        skip_prompt=True,
        skip_special_tokens=True
    )

    gen_kwargs = dict(
        **inputs,
        max_new_tokens=MAX_NEW_TOKENS,
        do_sample=True,
        temperature=TEMPERATURE,
        top_p=TOP_P,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.eos_token_id,
        streamer=streamer,
    )

    thread = threading.Thread(
        target=model.generate,
        kwargs=gen_kwargs
    )
    thread.start()

    placeholder = st.empty()
    output_text = ""

    for token in streamer:
        output_text += token
        placeholder.markdown(f"**Ureola:** {output_text}")

    st.session_state.history.append(("Ureola", output_text))

# ================= DISPLAY HISTORY =================
for speaker, text in st.session_state.history:
    if speaker == "You":
        st.markdown(f"**You:** {text}")
    else:
        st.markdown(f"**Ureola:** {text}")