| import os |
| import threading |
| import gradio as gr |
| import spaces |
| from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer |
|
|
| HF_TOKEN = os.environ.get("HF_TOKEN") |
| REPO_ID = "TitleOS/GalacticReasoning-1.3B-Q8" |
|
|
| |
| FALLBACK_CHAT_TEMPLATE = ( |
| "{% for message in messages %}" |
| "{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}" |
| "{% endfor %}" |
| "{% if add_generation_prompt %}" |
| "{{ '<|im_start|>assistant\n' }}" |
| "{% endif %}" |
| ) |
|
|
| tokenizer = None |
| model = None |
|
|
| def load_model(): |
| global tokenizer, model |
| if model is None: |
| tokenizer = AutoTokenizer.from_pretrained(REPO_ID, token=HF_TOKEN) |
| model = AutoModelForCausalLM.from_pretrained( |
| REPO_ID, |
| token=HF_TOKEN, |
| device_map="auto" |
| ) |
| return tokenizer, model |
|
|
| @spaces.GPU(duration=180) |
| def bot(history): |
| tok, mod = load_model() |
| |
| raw_llama_messages = [] |
| |
| for msg in history: |
| role = msg["role"] |
| content = msg["content"] |
| |
| if isinstance(content, str): |
| if content.strip(): |
| raw_llama_messages.append({"role": role, "content": content}) |
| elif isinstance(content, tuple): |
| filepath = content[0] |
| try: |
| with open(filepath, "r", encoding="utf-8", errors="ignore") as f: |
| file_text = f.read() |
| raw_llama_messages.append({ |
| "role": "user", |
| "content": f"--- Attachment: {os.path.basename(filepath)} ---\n{file_text}" |
| }) |
| except Exception as e: |
| print(f"Error reading file: {e}") |
|
|
| merged_messages = [] |
| for msg in raw_llama_messages: |
| if merged_messages and merged_messages[-1]["role"] == msg["role"]: |
| merged_messages[-1]["content"] += "\n\n" + msg["content"] |
| else: |
| merged_messages.append(msg) |
|
|
| |
| prompt_tensors = tok.apply_chat_template( |
| merged_messages, |
| chat_template=FALLBACK_CHAT_TEMPLATE, |
| tokenize=True, |
| add_generation_prompt=True, |
| return_tensors="pt" |
| ).to(mod.device) |
| |
| history.append({"role": "assistant", "content": ""}) |
|
|
| streamer = TextIteratorStreamer(tok, timeout=10.0, skip_prompt=True, skip_special_tokens=True) |
| generate_kwargs = dict( |
| input_ids=prompt_tensors, |
| streamer=streamer, |
| max_new_tokens=4096, |
| ) |
| |
| t = threading.Thread(target=mod.generate, kwargs=generate_kwargs) |
| t.start() |
|
|
| for new_text in streamer: |
| history[-1]["content"] += new_text |
| yield history |
|
|
| def add_user_message(msg, hist): |
| for f in msg["files"]: |
| hist.append({"role": "user", "content": (f,)}) |
| if msg["text"]: |
| hist.append({"role": "user", "content": msg["text"]}) |
| |
| return hist, gr.MultimodalTextbox(value={"text": "", "files": []}, interactive=False) |
|
|
| with gr.Blocks(fill_height=True) as demo: |
| chatbot = gr.Chatbot(scale=1) |
| chat_input = gr.MultimodalTextbox( |
| interactive=True, |
| file_types=["text"], |
| placeholder="Write a prompt to test Galactic Reasoning's Chain of Thought, use <think> to encourage this behavior at the end of your prompt.", |
| show_label=False |
| ) |
|
|
| chat_input.submit( |
| add_user_message, |
| inputs=[chat_input, chatbot], |
| outputs=[chatbot, chat_input] |
| ).then( |
| bot, |
| inputs=[chatbot], |
| outputs=[chatbot] |
| ).then( |
| lambda: gr.MultimodalTextbox(interactive=True), |
| outputs=[chat_input] |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch(ssr_mode=False) |