Spaces:
Paused
Paused
File size: 3,191 Bytes
75eb8a9 35eaef3 ffaae69 54b468c ffaae69 9ea48ba ffaae69 2648494 9ea48ba 54b468c be43da3 5c0c26c 54b468c 9ea48ba 54b468c 35eaef3 9ea48ba 35eaef3 54b468c 35eaef3 54b468c 35eaef3 54b468c 35eaef3 54b468c 35eaef3 e581288 9ea48ba ce032b0 ffaae69 9ea48ba 54b468c 5c0c26c 9ea48ba 54b468c 5c0c26c 54b468c 9ea48ba 54b468c 9ea48ba 54b468c 9869736 9ea48ba 54b468c 9ea48ba 54b468c 9ea48ba 35eaef3 54b468c 9ea48ba 35eaef3 54b468c ffaae69 84b2dc5 ffaae69 35eaef3 ffaae69 35eaef3 84b2dc5 ffaae69 84b2dc5 35eaef3 54b468c 5c0c26c be43da3 54b468c 35eaef3 be43da3 35eaef3 ffaae69 54b468c 35eaef3 ffaae69 9ea48ba 35eaef3 9ea48ba 5c0c26c 9ea48ba ce032b0 75eb8a9 9ea48ba | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 | import streamlit as st
import torch
import threading
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
TextIteratorStreamer,
)
# ================= CONFIG =================
MODEL_ID = "Neon-AI/Kushina"
MAX_NEW_TOKENS = 16384
TEMPERATURE = 0.7
TOP_P = 0.9
# ==========================================
st.set_page_config(page_title="Ureola", layout="centered")
st.title("🧠 Ureola")
st.caption("HF Free Space · CPU · Streaming")
# ================= LOAD MODEL =================
@st.cache_resource
def load_model():
tokenizer = AutoTokenizer.from_pretrained(
MODEL_ID,
trust_remote_code=True
)
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
torch_dtype=torch.float32
)
model.eval()
return tokenizer, model
tokenizer, model = load_model()
# ================= SESSION STATE =================
if "history" not in st.session_state:
st.session_state.history = []
# ================= SYSTEM PROMPT =================
SYSTEM_PROMPT = """
You are Ureola.
You operate in exactly ONE of three modes, but you never talk to users about them.
MODE: CHAT
- Mirror the user's tone.
- Replies are short (1–3 sentences).
- No emojis unless user uses them first.
- No explanations unless asked.
MODE: CODE
- Output ONLY code unless asked to explain.
- No personality or commentary.
MODE: ACADEMIC
- Neutral, formal tone.
- Clear structure.
- Fully answer the task.
MODE SELECTION:
- CODE → code, script, program, app, api, algorithm
- ACADEMIC → essay, explanation, homework, analysis
- Otherwise → CHAT
IDENTITY:
Name: Ureola
Creator: Neon
Mention Neon ONLY if explicitly asked.
""".strip()
# ================= INPUT =================
prompt = st.text_input("You", placeholder="Say something…")
if st.button("Send") and prompt.strip():
st.session_state.history.append(("You", prompt))
chat = [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": prompt},
]
# IMPORTANT: return_dict=True (this avoids your crash)
inputs = tokenizer.apply_chat_template(
chat,
add_generation_prompt=True,
return_tensors="pt",
return_dict=True
)
streamer = TextIteratorStreamer(
tokenizer,
skip_prompt=True,
skip_special_tokens=True
)
gen_kwargs = dict(
**inputs,
max_new_tokens=MAX_NEW_TOKENS,
do_sample=True,
temperature=TEMPERATURE,
top_p=TOP_P,
eos_token_id=tokenizer.eos_token_id,
pad_token_id=tokenizer.eos_token_id,
streamer=streamer,
)
thread = threading.Thread(
target=model.generate,
kwargs=gen_kwargs
)
thread.start()
placeholder = st.empty()
output_text = ""
for token in streamer:
output_text += token
placeholder.markdown(f"**Ureola:** {output_text}")
st.session_state.history.append(("Ureola", output_text))
# ================= DISPLAY HISTORY =================
for speaker, text in st.session_state.history:
if speaker == "You":
st.markdown(f"**You:** {text}")
else:
st.markdown(f"**Ureola:** {text}") |