import os import threading import gradio as gr import spaces from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer HF_TOKEN = os.environ.get("HF_TOKEN") REPO_ID = "TitleOS/GalacticReasoning-1.3B-Q8" # Standard ChatML template for models missing their tokenizer configs FALLBACK_CHAT_TEMPLATE = ( "{% for message in messages %}" "{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}" "{% endfor %}" "{% if add_generation_prompt %}" "{{ '<|im_start|>assistant\n' }}" "{% endif %}" ) tokenizer = None model = None def load_model(): global tokenizer, model if model is None: tokenizer = AutoTokenizer.from_pretrained(REPO_ID, token=HF_TOKEN) model = AutoModelForCausalLM.from_pretrained( REPO_ID, token=HF_TOKEN, device_map="auto" ) return tokenizer, model @spaces.GPU(duration=180) def bot(history): tok, mod = load_model() raw_llama_messages = [] for msg in history: role = msg["role"] content = msg["content"] if isinstance(content, str): if content.strip(): raw_llama_messages.append({"role": role, "content": content}) elif isinstance(content, tuple): filepath = content[0] try: with open(filepath, "r", encoding="utf-8", errors="ignore") as f: file_text = f.read() raw_llama_messages.append({ "role": "user", "content": f"--- Attachment: {os.path.basename(filepath)} ---\n{file_text}" }) except Exception as e: print(f"Error reading file: {e}") merged_messages = [] for msg in raw_llama_messages: if merged_messages and merged_messages[-1]["role"] == msg["role"]: merged_messages[-1]["content"] += "\n\n" + msg["content"] else: merged_messages.append(msg) # We inject the fallback template here to bypass the missing config error prompt_tensors = tok.apply_chat_template( merged_messages, chat_template=FALLBACK_CHAT_TEMPLATE, tokenize=True, add_generation_prompt=True, return_tensors="pt" ).to(mod.device) history.append({"role": "assistant", "content": ""}) streamer = TextIteratorStreamer(tok, timeout=10.0, skip_prompt=True, skip_special_tokens=True) generate_kwargs = dict( input_ids=prompt_tensors, streamer=streamer, max_new_tokens=4096, ) t = threading.Thread(target=mod.generate, kwargs=generate_kwargs) t.start() for new_text in streamer: history[-1]["content"] += new_text yield history def add_user_message(msg, hist): for f in msg["files"]: hist.append({"role": "user", "content": (f,)}) if msg["text"]: hist.append({"role": "user", "content": msg["text"]}) return hist, gr.MultimodalTextbox(value={"text": "", "files": []}, interactive=False) with gr.Blocks(fill_height=True) as demo: chatbot = gr.Chatbot(scale=1) chat_input = gr.MultimodalTextbox( interactive=True, file_types=["text"], placeholder="Write a prompt to test Galactic Reasoning's Chain of Thought, use to encourage this behavior at the end of your prompt.", show_label=False ) chat_input.submit( add_user_message, inputs=[chat_input, chatbot], outputs=[chatbot, chat_input] ).then( bot, inputs=[chatbot], outputs=[chatbot] ).then( lambda: gr.MultimodalTextbox(interactive=True), outputs=[chat_input] ) if __name__ == "__main__": demo.launch(ssr_mode=False)