TitleOS's picture
Update app.py
ae0a63f verified
import os
import threading
import gradio as gr
import spaces
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
HF_TOKEN = os.environ.get("HF_TOKEN")
REPO_ID = "TitleOS/GalacticReasoning-1.3B-Q8"
# Standard ChatML template for models missing their tokenizer configs
FALLBACK_CHAT_TEMPLATE = (
"{% for message in messages %}"
"{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}"
"{% endfor %}"
"{% if add_generation_prompt %}"
"{{ '<|im_start|>assistant\n' }}"
"{% endif %}"
)
tokenizer = None
model = None
def load_model():
global tokenizer, model
if model is None:
tokenizer = AutoTokenizer.from_pretrained(REPO_ID, token=HF_TOKEN)
model = AutoModelForCausalLM.from_pretrained(
REPO_ID,
token=HF_TOKEN,
device_map="auto"
)
return tokenizer, model
@spaces.GPU(duration=180)
def bot(history):
tok, mod = load_model()
raw_llama_messages = []
for msg in history:
role = msg["role"]
content = msg["content"]
if isinstance(content, str):
if content.strip():
raw_llama_messages.append({"role": role, "content": content})
elif isinstance(content, tuple):
filepath = content[0]
try:
with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
file_text = f.read()
raw_llama_messages.append({
"role": "user",
"content": f"--- Attachment: {os.path.basename(filepath)} ---\n{file_text}"
})
except Exception as e:
print(f"Error reading file: {e}")
merged_messages = []
for msg in raw_llama_messages:
if merged_messages and merged_messages[-1]["role"] == msg["role"]:
merged_messages[-1]["content"] += "\n\n" + msg["content"]
else:
merged_messages.append(msg)
# We inject the fallback template here to bypass the missing config error
prompt_tensors = tok.apply_chat_template(
merged_messages,
chat_template=FALLBACK_CHAT_TEMPLATE,
tokenize=True,
add_generation_prompt=True,
return_tensors="pt"
).to(mod.device)
history.append({"role": "assistant", "content": ""})
streamer = TextIteratorStreamer(tok, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
generate_kwargs = dict(
input_ids=prompt_tensors,
streamer=streamer,
max_new_tokens=4096,
)
t = threading.Thread(target=mod.generate, kwargs=generate_kwargs)
t.start()
for new_text in streamer:
history[-1]["content"] += new_text
yield history
def add_user_message(msg, hist):
for f in msg["files"]:
hist.append({"role": "user", "content": (f,)})
if msg["text"]:
hist.append({"role": "user", "content": msg["text"]})
return hist, gr.MultimodalTextbox(value={"text": "", "files": []}, interactive=False)
with gr.Blocks(fill_height=True) as demo:
chatbot = gr.Chatbot(scale=1)
chat_input = gr.MultimodalTextbox(
interactive=True,
file_types=["text"],
placeholder="Write a prompt to test Galactic Reasoning's Chain of Thought, use <think> to encourage this behavior at the end of your prompt.",
show_label=False
)
chat_input.submit(
add_user_message,
inputs=[chat_input, chatbot],
outputs=[chatbot, chat_input]
).then(
bot,
inputs=[chatbot],
outputs=[chatbot]
).then(
lambda: gr.MultimodalTextbox(interactive=True),
outputs=[chat_input]
)
if __name__ == "__main__":
demo.launch(ssr_mode=False)