Spaces:

TitleOS
/

GalacticReasoning-Q8-Playground

Paused

App Files Files Community

GalacticReasoning-Q8-Playground / app.py

TitleOS

Update app.py

ae0a63f verified about 2 months ago

raw

history blame contribute delete

3.85 kB

	import os
	import threading
	import gradio as gr
	import spaces
	from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer

	HF_TOKEN = os.environ.get("HF_TOKEN")
	REPO_ID = "TitleOS/GalacticReasoning-1.3B-Q8"

	# Standard ChatML template for models missing their tokenizer configs
	FALLBACK_CHAT_TEMPLATE = (
	"{% for message in messages %}"
	"{{'<\|im_start\|>' + message['role'] + '\n' + message['content'] + '<\|im_end\|>' + '\n'}}"
	"{% endfor %}"
	"{% if add_generation_prompt %}"
	"{{ '<\|im_start\|>assistant\n' }}"
	"{% endif %}"
	)

	tokenizer = None
	model = None

	def load_model():
	global tokenizer, model
	if model is None:
	tokenizer = AutoTokenizer.from_pretrained(REPO_ID, token=HF_TOKEN)
	model = AutoModelForCausalLM.from_pretrained(
	REPO_ID,
	token=HF_TOKEN,
	device_map="auto"
	)
	return tokenizer, model

	@spaces.GPU(duration=180)
	def bot(history):
	tok, mod = load_model()

	raw_llama_messages = []

	for msg in history:
	role = msg["role"]
	content = msg["content"]

	if isinstance(content, str):
	if content.strip():
	raw_llama_messages.append({"role": role, "content": content})
	elif isinstance(content, tuple):
	filepath = content[0]
	try:
	with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
	file_text = f.read()
	raw_llama_messages.append({
	"role": "user",
	"content": f"--- Attachment: {os.path.basename(filepath)} ---\n{file_text}"
	})
	except Exception as e:
	print(f"Error reading file: {e}")

	merged_messages = []
	for msg in raw_llama_messages:
	if merged_messages and merged_messages[-1]["role"] == msg["role"]:
	merged_messages[-1]["content"] += "\n\n" + msg["content"]
	else:
	merged_messages.append(msg)

	# We inject the fallback template here to bypass the missing config error
	prompt_tensors = tok.apply_chat_template(
	merged_messages,
	chat_template=FALLBACK_CHAT_TEMPLATE,
	tokenize=True,
	add_generation_prompt=True,
	return_tensors="pt"
	).to(mod.device)

	history.append({"role": "assistant", "content": ""})

	streamer = TextIteratorStreamer(tok, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
	generate_kwargs = dict(
	input_ids=prompt_tensors,
	streamer=streamer,
	max_new_tokens=4096,
	)

	t = threading.Thread(target=mod.generate, kwargs=generate_kwargs)
	t.start()

	for new_text in streamer:
	history[-1]["content"] += new_text
	yield history

	def add_user_message(msg, hist):
	for f in msg["files"]:
	hist.append({"role": "user", "content": (f,)})
	if msg["text"]:
	hist.append({"role": "user", "content": msg["text"]})

	return hist, gr.MultimodalTextbox(value={"text": "", "files": []}, interactive=False)

	with gr.Blocks(fill_height=True) as demo:
	chatbot = gr.Chatbot(scale=1)
	chat_input = gr.MultimodalTextbox(
	interactive=True,
	file_types=["text"],
	placeholder="Write a prompt to test Galactic Reasoning's Chain of Thought, use <think> to encourage this behavior at the end of your prompt.",
	show_label=False
	)

	chat_input.submit(
	add_user_message,
	inputs=[chat_input, chatbot],
	outputs=[chatbot, chat_input]
	).then(
	bot,
	inputs=[chatbot],
	outputs=[chatbot]
	).then(
	lambda: gr.MultimodalTextbox(interactive=True),
	outputs=[chat_input]
	)

	if __name__ == "__main__":
	demo.launch(ssr_mode=False)