Spaces:

DmitryMalikov
/

RSA

Sleeping

RSA / app.py

Маликов Дмитрий Романович

Update Application

d08c34e 2 months ago

6.63 kB

	import re
	import torch
	import random
	import gradio as gr

	from collections import Counter
	from typing import List
	from transformers import AutoTokenizer, AutoModelForCausalLM


	DEFAULT_MODEL = "Qwen/Qwen2.5-1.5B-Instruct"

	COT_PROMPT_TEMPLATE = """You are a highly precise reasoning assistant. Solve the problem step by step.

	Problem: {question}

	INSTRUCTIONS (READ CAREFULLY):
	1. Show your step-by-step reasoning in numbered steps, e.g.:
	Step 1: ...
	Step 2: ...
	Final answer: ...
	3. THE FINAL ANSWER MUST BE STRICTLY AN INTEGER NUMBER. NO UNITS, NO WORDS, NO SYMBOLS, NO LATEX.
	4. THE FINAL ANSWER MUST BE OUTPUT IN THIS EXACT FORMAT:
	Final answer: #### <final_number>
	Replace <final_number> with only the INTEGER NUMERIC ANSWER WITHOUT ANITHING ELSE.
	5. DO NOT INCLUDE ANYTHING ELSE AFTER '####'.
	6. IF YOU CANNOT CALCULATE, OUTPUT ONLY '#### NO ANSWER' AS PLACEHOLDER.

	BEGIN STEP-BY-STEP REASONING:
	"""

	AGGREGATION_PROMPT_TEMPLATE = """You are a highly precise reasoning assistant.

	You are given a math problem and several candidate solutions. Some candidates may be incorrect, incomplete, or contain reasoning errors.
	Your task is to aggregate the useful ideas from the candidates and produce a single, high-quality solution.

	Problem:
	{question}

	Candidate solutions:
	{candidates}

	INSTRUCTIONS (READ CAREFULLY):
	1. Analyze all candidate solutions step by step.
	2. Identify correct, useful, or partially correct reasoning steps.
	3. If candidates disagree, select the logically correct path and discard incorrect reasoning.
	4. Combine the selected reasoning into one coherent, numbered step-by-step solution, e.g.:
	Step 1: ...
	Step 2: ...
	Final answer: ...
	5. If all candidate solutions are incorrect or inconsistent, abandon them and solve the problem using a correct alternative strategy.
	6. THE FINAL ANSWER MUST BE STRICTLY AN INTEGER NUMBER.
	- NO UNITS
	- NO WORDS
	- NO SYMBOLS
	- NO LATEX
	7. THE FINAL ANSWER MUST BE OUTPUT IN THIS EXACT FORMAT:
	Final answer: #### <final_number>
	8. Replace <final_number> with ONLY the integer numeric answer.
	9. DO NOT INCLUDE ANYTHING ELSE AFTER '####'.

	BEGIN AGGREGATED STEP-BY-STEP REASONING:
	"""


	def load_model(model_name: str):
	tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, padding_side='left')
	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	device_map="auto",
	torch_dtype=torch.float16,
	trust_remote_code=True
	)
	return tokenizer, model


	def generate_responses(model, tokenizer, prompts: List[str], max_new_tokens: int = 512):
	messages = [{"role": "user", "content": prompt} for prompt in prompts]
	texts = []
	if hasattr(tokenizer, "apply_chat_template"):
	for m in messages:
	texts.append(tokenizer.apply_chat_template([m], tokenize=False, add_generation_prompt=True))
	else:
	texts = [m["content"] for m in messages]

	model_inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True)
	device = next(model.parameters()).device
	model_inputs = {k: v.to(device) for k, v in model_inputs.items()}

	generated_ids = model.generate(**model_inputs, max_new_tokens=max_new_tokens)

	input_len = model_inputs["input_ids"].shape[1]
	gen_tokens = generated_ids[:, input_len:]
	texts_decoded = tokenizer.batch_decode(gen_tokens, skip_special_tokens=True)
	return texts_decoded


	def extract_answer_from_text(text: str) -> str:
	m = re.search(r"####\s*(NO ANSWER\|-?\d+)", text, flags=re.IGNORECASE)
	if m:
	return m.group(1).strip().upper()
	m2 = re.search(r"(-?\d+)(?!.*-?\d+)", text)
	if m2:
	return m2.group(1)
	return ""


	def majority_vote(population: List[str]) -> str:
	answers = []
	for content in population:
	ans = extract_answer_from_text(content)
	if ans:
	answers.append(ans)

	if not answers:
	return ""

	counter = Counter(answers)
	return counter.most_common(1)[0][0]


	def rsa_for_question(model, tokenizer, question: str, N=4, K=2, T=2):
	prompts = [COT_PROMPT_TEMPLATE.format(question=question) for _ in range(N)]
	population = generate_responses(model, tokenizer, prompts)

	for t in range(T):
	agg_prompts = []
	for _ in range(N):
	if len(population) >= K:
	chosens = random.sample(population, K)
	else:
	chosens = random.choices(population, k=K)

	candidates_text = "\n\n".join([f"CANDIDATE #{i+1}:\n{c}" for i, c in enumerate(chosens)])
	agg_prompt = AGGREGATION_PROMPT_TEMPLATE.format(
	question=question,
	candidates=candidates_text
	)
	agg_prompts.append(agg_prompt)

	population = generate_responses(model, tokenizer, agg_prompts)

	final_answer = majority_vote(population)
	return "\n\n---\n\n".join(population), final_answer


	def run_and_format(model_name, question, N, K, T, compare_baseline):
	tokenizer, model = load_model(model_name)
	N = int(N)
	K = int(K)
	T = int(T)

	final_population, final_answer = rsa_for_question(model, tokenizer, question, N, K, T)

	if bool(compare_baseline):
	baseline_output, baseline_answer = rsa_for_question(model, tokenizer, question, 1, 1, 0)
	else:
	baseline_output = baseline_answer = "(skipped)"

	return final_population, final_answer, baseline_output, baseline_answer


	with gr.Blocks(title="RSA") as demo:
	gr.Markdown("### Recursive Self-Aggregation for Mathematical Reasoning\nEnter a problem whose answer is a single integer.")

	model_name = gr.Textbox(label="Model name", value=DEFAULT_MODEL, lines=1)
	with gr.Row():
	N = gr.Slider(1, 16, value=4, step=1, label="N")
	K = gr.Slider(1, 16, value=2, step=1, label="K")
	T = gr.Slider(0, 10, value=2, step=1, label="T")
	compare_baseline = gr.Checkbox(label="Compare with baseline", value=True)
	question = gr.Textbox(label="Question", lines=2, value="How many positive divisors does number 120 have?")

	run_btn = gr.Button("Run")

	final_population = gr.Textbox(label="Final Population", lines=10)
	final_answer = gr.Textbox(label="Final answer")
	baseline_output = gr.Textbox(label="Baseline output", lines=5)
	baseline_answer = gr.Textbox(label="Baseline answer")

	run_btn.click(run_and_format, inputs=[model_name, question, N, K, T, compare_baseline],
	outputs=[final_population, final_answer, baseline_output, baseline_answer])


	if __name__ == "__main__":
	demo.launch()