code and checkpoint

Files changed (16) hide show

.gitattributes +1 -0
.gitignore +4 -0
AMC.SH +8 -0
automr/__init__.py +7 -0
automr/config.py +45 -0
automr/dag.py +77 -0
automr/data_loader.py +35 -0
automr/evaluator.py +93 -0
automr/model.py +404 -0
automr/strategies.py +40 -0
automr/trainer.py +244 -0
automr/utils.py +693 -0
embedder_server.sh +11 -0
generator_server.sh +10 -0
main.py +119 -0
math_train.sh +12 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+checkpoints/MATH/pangu/best_checkpoint.pt filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,4 @@

+processed_data/
+checkpoints/
+results/
+automr/__pycache__/

AMC.SH ADDED Viewed

	@@ -0,0 +1,8 @@

+ASCEND_RT_VISIBLE_DEVICES=4 python main.py --mode eval \
+  --device npu \
+  --model_name FreedomIntelligence/openPangu-Embedded-7B \
+  --test_data processed_data/AMC/test.jsonl \
+  --load_checkpoint checkpoints/MATH/pangu/best_checkpoint.pt \
+  --task_type math \
+  --results_dir results/AMC/ \
+  --token_budget 4096 \

automr/__init__.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from .model import AutoMR
+from .trainer import AutoMRTrainer
+from .evaluator import AutoMREvaluator
+from .config import AutoMRConfig
+__version__ = "1.0.0"
+__all__ = ["AutoMR", "AutoMRTrainer", "AutoMREvaluator", "AutoMRConfig"]

automr/config.py ADDED Viewed

	@@ -0,0 +1,45 @@

+from dataclasses import dataclass
+from typing import Optional
+@dataclass
+class AutoMRConfig:
+    """Configuration for AutoMR framework"""
+    # Model settings
+    model_name: str = "FreedomIntelligence/openPangu-Embedded-7B"
+    device: str = "npu"
+    token_budget: int = 4096
+    hidden_size: int = 4096  # To be set according to the model used
+    # Training settings
+    learning_rate: float = 5e-4
+    num_epochs: int = 5
+    batch_size: int = 8
+    num_samples_per_query: int = 4  # M in paper
+    gradient_clip: float = 1.0
+    # Validation settings
+    val_every_n_steps: int = 100  # Alpha in the requirement - validate every N steps
+    val_batch_size: int = 10  # Number of validation samples to evaluate
+    early_stopping_patience: int = 5  # Stop if no improvement for N validations
+    # Generation settings
+    max_new_tokens: int = 1024
+    temperature: float = 0.01
+    top_p: float = 0.9
+    # Paths
+    train_data_path: str = "data/train.json"
+    val_data_path: str = "data/val.json"
+    test_data_path: str = "data/test.json"
+    checkpoint_dir: str = "checkpoints"
+    results_dir: str = "results"
+    # Evaluation settings
+    save_predictions: bool = True
+    save_skeletons: bool = True
+    save_best_only: bool = True
+    # Task type
+    task_type: str = "math"  # "math" or "multiple_choice"

automr/dag.py ADDED Viewed

	@@ -0,0 +1,77 @@

+from dataclasses import dataclass
+from typing import List
+import torch
+@dataclass
+class ReasoningNode:
+    """Represents a node in the meta-reasoning DAG"""
+    index: int
+    content: str
+    num_tokens: int
+    content_repr: torch.Tensor
+@dataclass
+class ReasoningEdge:
+    """Represents an edge in the meta-reasoning DAG"""
+    from_node: int
+    to_node: int
+    strategy: str
+class MetaReasoningDAG:
+    """Represents the meta-reasoning skeleton as a DAG"""
+    def __init__(self, query: str, query_repr: torch.Tensor, query_num_tokens: int):
+        self.nodes: List[ReasoningNode] = [ReasoningNode(0, query, query_num_tokens, query_repr)]
+        self.edges: List[ReasoningEdge] = []
+        self.current_index = 0
+    def add_node(self, content: str, num_tokens: int, content_repr: torch.Tensor) -> int:
+        """Add a new node to the DAG"""
+        self.current_index += 1
+        node = ReasoningNode(self.current_index, content, num_tokens, content_repr)
+        self.nodes.append(node)
+        return self.current_index
+    def add_edge(self, from_idx: int, to_idx: int, strategy: str):
+        """Add an edge between two nodes"""
+        self.edges.append(ReasoningEdge(from_idx, to_idx, strategy))
+    def get_node_content_repr(self, idx: int) -> torch.Tensor:
+        """Get content representation of a specific node"""
+        return self.nodes[idx].content_repr
+    def get_node_content(self, idx: int) -> str:
+        """Get content of a specific node"""
+        return self.nodes[idx].content
+    def get_context_repr_up_to(self, idx: int) -> torch.Tensor:
+        """Get context representation up to index idx"""
+        context_repr = self.nodes[0].content_repr
+        for node in self.nodes[1: idx+1]:
+            context_repr += node.content_repr
+        context_repr /= (idx + 1)
+        return context_repr
+    def get_context_up_to(self, idx: int) -> str:
+        """Get all node contents up to index idx"""
+        return "\n".join([node.content for node in self.nodes[:idx+1]])
+    def total_tokens(self) -> int:
+        """Total tokens generated (excluding source node)"""
+        return sum(node.num_tokens for node in self.nodes[1:])
+    def to_dict(self) -> dict:
+        """Convert DAG to dictionary for serialization"""
+        return {
+            "nodes": [
+                {"index": n.index, "content": n.content, "num_tokens": n.num_tokens}
+                for n in self.nodes
+            ],
+            "edges": [
+                {"from": e.from_node, "to": e.to_node, "strategy": e.strategy}
+                for e in self.edges
+            ]
+        }

automr/data_loader.py ADDED Viewed

	@@ -0,0 +1,35 @@

+import json
+from typing import List, Dict
+class DataLoader:
+    """Load and process datasets"""
+    @staticmethod
+    def load_data(file_path: str) -> List[Dict[str, str]]:
+        """
+        Load data from JSON file
+        Expected format: [{"query": "...", "answer": "..."}, ...]
+        """
+        with open(file_path, 'r') as f:
+            if file_path.endswith('.jsonl'):
+                data = [json.loads(line) for line in f]
+            else:
+                data = json.load(f)
+        # Validate data format
+        for item in data:
+            if 'query' not in item or 'answer' not in item:
+                raise ValueError("Each data item must have 'query' and 'answer' fields")
+        return data
+    @staticmethod
+    def load_math_dataset(file_path: str) -> List[Dict[str, str]]:
+        """Load MATH or GSM8K format dataset"""
+        return DataLoader.load_data(file_path)
+    @staticmethod
+    def load_mmlu_dataset(file_path: str) -> List[Dict[str, str]]:
+        """Load MMLU-Pro format dataset"""
+        return DataLoader.load_data(file_path)

automr/evaluator.py ADDED Viewed

	@@ -0,0 +1,93 @@

+from typing import List, Dict, Tuple
+from tqdm import tqdm
+import os
+from .model import AutoMR
+from .config import AutoMRConfig
+from .utils import check_answer_match, save_json, ensure_dir
+class AutoMREvaluator:
+    """Evaluator for AutoMR"""
+    def __init__(self, model: AutoMR, config: AutoMRConfig):
+        self.model = model
+        self.config = config
+        ensure_dir(config.results_dir)
+    def evaluate(self, test_data: List[Dict[str, str]]) -> Tuple[float, List[Dict]]:
+        """
+        Evaluate model on test data
+        Returns: (accuracy, detailed_results)
+        """
+        print(f"\nEvaluating on {len(test_data)} samples...")
+        self.model.strategy_mlp.eval()
+        self.model.strategy_embeddings.eval()
+        correct = 0
+        total = 0
+        detailed_results = []
+        batch_size = self.config.batch_size
+        pbar = tqdm(
+            range(0, len(test_data), batch_size), desc="Evaluating"
+        )
+        # for item in tqdm(test_data, desc="Evaluating"):
+        for i in pbar:
+            batch = test_data[i:i + batch_size]
+            queries = [item['query'] for item in batch]
+            ground_truths = [item['answer'] for item in batch]
+            # Run inference
+            pred_answers, dags = self.model.inference(queries,M=1)
+            for query, ground_truth, pred_answer, dag in zip(queries, ground_truths, pred_answers, dags):
+                # Check correctness
+                is_correct = check_answer_match(
+                    pred_answer,
+                    ground_truth,
+                    self.config.task_type
+                )
+                if is_correct:
+                    correct += 1
+                total += 1
+            pbar.set_postfix({
+                'Acc': f'{correct} / {total}',
+            })
+            # Store detailed result
+            # result = {
+            #     'query': query,
+            #     'ground_truth': ground_truth,
+            #     'prediction': pred_answer,
+            #     'correct': is_correct
+            # }
+            # if self.config.save_skeletons:
+            #     result['skeleton'] = dag.to_dict()
+            # detailed_results.append(result)
+        accuracy = correct / total if total > 0 else 0.0
+        print(f"\nEvaluation Results:")
+        print(f"Accuracy: {accuracy:.4f} ({correct}/{total})")
+        # Save results
+        if self.config.save_predictions:
+            results_path = os.path.join(
+                self.config.results_dir,
+                'evaluation_results.json'
+            )
+            save_json({
+                'accuracy': accuracy,
+                'correct': correct,
+                'total': total,
+                'detailed_results': detailed_results
+            }, results_path)
+            print(f"Results saved to {results_path}")
+        return accuracy, detailed_results

automr/model.py ADDED Viewed

	@@ -0,0 +1,404 @@

+from concurrent.futures import ThreadPoolExecutor
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from typing import List, Tuple
+import random
+from transformers import AutoTokenizer, AutoModelForCausalLM
+from vllm import LLM
+from vllm import SamplingParams
+from .config import AutoMRConfig
+from .strategies import META_STRATEGIES, STRATEGY_LIST
+from .dag import MetaReasoningDAG
+from .utils import extract_answer
+from typing import Dict
+from openai import OpenAI
+class StrategyMLP(nn.Module):
+    """MLP for sampling meta-reasoning strategies"""
+    def __init__(self, hidden_size: int, num_strategies: int):
+        super().__init__()
+        # Input: [node_repr, strategy_repr, context_repr]
+        self.fc1 = nn.Linear(hidden_size * 3, hidden_size * 2)
+        self.fc2 = nn.Linear(hidden_size * 2, hidden_size)
+        self.fc3 = nn.Linear(hidden_size, num_strategies)
+        self.dropout = nn.Dropout(0.1)
+    def forward(self, node_repr, strategy_repr, context_repr):
+        """
+        Args:
+            node_repr: [batch, hidden_size]
+            strategy_repr: [batch, hidden_size]
+            context_repr: [batch, hidden_size]
+        Returns:
+            logits: [batch, num_strategies]
+        """
+        x = torch.cat([node_repr, strategy_repr, context_repr], dim=-1)
+        x = F.relu(self.fc1(x))
+        x = self.dropout(x)
+        x = F.relu(self.fc2(x))
+        x = self.dropout(x)
+        logits = self.fc3(x)
+        return logits
+class AutoMR:
+    """AutoMR Framework for Meta-Reasoning Skeleton Search"""
+    def __init__(self, config: AutoMRConfig):
+        self.config = config
+        self.device = config.device
+        self.token_budget = config.token_budget
+        self.model_name_for_api = config.model_name
+        # # Load LLM
+        # print(f"Loading Tokenizer and Config: {config.model_name}")
+        # self.tokenizer = AutoTokenizer.from_pretrained(config.model_name)
+        # print(f"Loading vLLM generator: {config.model_name}")
+        # self.llm = LLM(
+        #     config.model_name,
+        #     dtype=torch.float16,
+        #     trust_remote_code=True,
+        #     tensor_parallel_size=config.tensor_parallel_size,
+        #     gpu_memory_utilization="0.8"
+        # )
+        # print(f"Loading LLM Embbedder: {config.model_name}")
+        # self.llm_embedder = AutoModelForCausalLM.from_pretrained(
+        #     config.model_name,
+        #     torch_dtype=torch.float16,
+        #     trust_remote_code=True,
+        #     device_map=None
+        # ).to(self.device)
+        # self.llm_embedder.eval()
+        # print(f"Loading vLLM Embbedder: {config.model_name}")
+        # self.llm_embedder = LLM(
+        #     config.model_name,
+        #     dtype=torch.float16,
+        #     trust_remote_code=True,
+        #     tensor_parallel_size=config.tensor_parallel_size,
+        #     gpu_memory_utilization="0.8"
+        #     task="embed",
+        # )
+        print("Connecting to vLLM Generator Server (port 8000)...")
+        self.generator_client = OpenAI(
+            api_key="vllm",
+            base_url="http://localhost:8000/v1"
+        )
+        print("Connecting to Custom Embedder Server (port 8001)...")
+        self.embed_client = OpenAI(
+            api_key="vllm",
+            base_url="http://localhost:8001/v1"
+        )
+        # Strategy components
+        self.num_strategies = len(STRATEGY_LIST)
+        hidden_size = config.hidden_size
+        self.strategy_embeddings = nn.Embedding(self.num_strategies, hidden_size).to(self.device)
+        self.strategy_mlp = StrategyMLP(hidden_size, self.num_strategies).to(self.device)
+        # Strategy mappings
+        self.strategy_to_idx = {s: i for i, s in enumerate(STRATEGY_LIST)}
+        self.idx_to_strategy = {i: s for i, s in enumerate(STRATEGY_LIST)}
+        # Optimizer
+        self.optimizer = torch.optim.Adam(
+            list(self.strategy_embeddings.parameters()) +
+            list(self.strategy_mlp.parameters()),
+            lr=config.learning_rate
+        )
+        print("AutoMR initialized successfully")
+    def get_text_representation(self, texts: List[str]) -> Tuple[torch.Tensor]:
+        """
+        Get pooled hidden state representations for texts in batch using LLM embedder.
+        Args:
+            texts: List of input texts
+        Returns:
+            pooled: Tensor of shape [batch_size, hidden_size]
+        """
+        # self.tokenizer.padding_side = "left"
+        # inputs = self.tokenizer(
+        #     texts,
+        #     return_tensors="pt",
+        #     padding=True,
+        #     truncation=True,
+        # ).to(self.device)
+        # with torch.no_grad():
+        #     outputs = self.llm(**inputs, output_hidden_states=True)
+        #     hidden_states = outputs.hidden_states[-1] # [bsz, len, dim]
+        #     pooled = hidden_states[:, -1, :]
+        # batch_outputs = self.llm_embedder.encode(texts)
+        # pooled = []
+        # for outputs in batch_outputs:
+        #     last_hidden_state = outputs.outputs.data[-1,:]  # [seq_len, hidden_size]
+        #     pooled.append(last_hidden_state)
+        # pooled = torch.stack(pooled, dim=0).to(self.device)  # [batch_size, hidden_size]
+        batch_outputs = self.embed_client.embeddings.create(
+            input=texts,
+            model=self.model_name_for_api
+        )
+        batch_reprs = [
+            torch.tensor(data.embedding, device=self.device, dtype=torch.float16)
+            for data in batch_outputs.data
+        ]
+        pooled = torch.stack(batch_reprs, dim=0) # [batch_size, hidden_size]
+        return pooled
+    def sample_strategy(
+        self,
+        batch_node_content_repr: torch.Tensor,
+        batch_sampled_strategies: Dict[int, List[int]],
+        batch_context_repr: torch.Tensor
+    ) -> Tuple[List[int], torch.Tensor]:
+        """
+        Sample a strategy for each edge (j, i) in batch
+        Args:
+            batch_node_content_repr: Tensor of shape [batch_size, hidden_size]
+            batch_sampled_strategies: Dict of lists of sampled strategy indices
+            batch_context_repr: Tensor of shape [batch_size, hidden_size]
+        Returns:
+            batch_strategy_idx: List of sampled strategy indices
+            batch_log_prob: Tensor of log probabilities, shape [batch_size]
+        """
+        batch_strategy_repr = []
+        for sampled_strategies in batch_sampled_strategies.values():
+            if sampled_strategies:
+                sampled_strategies = torch.tensor(sampled_strategies).to(self.device)
+                strategy_repr = self.strategy_embeddings(sampled_strategies).mean(dim=0, keepdim=True)
+            else:
+                strategy_repr = torch.zeros(1, self.config.hidden_size).to(self.device)
+            batch_strategy_repr.append(strategy_repr)
+        batch_strategy_repr = torch.cat(batch_strategy_repr, dim=0)  # Combine all batch representations
+        batch_logits = self.strategy_mlp(batch_node_content_repr, batch_strategy_repr, batch_context_repr)
+        batch_probs = F.softmax(batch_logits, dim=-1)
+        dist = torch.distributions.Categorical(batch_probs)
+        batch_strategy_idx = dist.sample()
+        batch_log_prob = dist.log_prob(batch_strategy_idx).to(self.device)
+        return batch_strategy_idx.cpu().tolist(), batch_log_prob
+    def generate_content(
+        self,
+        batch_query: List[str],
+        batch_context: List[str],
+        batch_strategies: List[List[str]],
+        batch_remaining_budget: List[int]
+    ) -> Tuple[List[str], List[int], torch.Tensor]:
+        """
+        Generate reasoning content based on selected strategies
+        Args:
+            batch_query: List of query strings
+            batch_context: List of context strings
+            batch_strategies: List of lists of strategy names
+            batch_remaining_budget: List of remaining token budgets
+        Returns:
+            batch_generated_texts: List of generated content strings
+            batch_num_tokens: List of number of tokens generated
+            batch_content_reprs: Tensor of content representations, shape [batch_size, hidden_size]
+        """
+        batch_strategy_prompts = [[] for _ in batch_query]
+        batch_full_prompt: List[str] = []
+        for i, strategies in enumerate(batch_strategies):
+            for s in strategies:
+                prompt = random.choice(META_STRATEGIES[s])
+                batch_strategy_prompts[i].append(prompt)
+            batch_full_prompt.append(f"{batch_context[i]}\n{' '.join(batch_strategy_prompts[i])}\n")
+        params_list = []
+        for i in range(len(batch_query)):
+            remaining_budget = batch_remaining_budget[i]
+            current_max_tokens = min(self.config.max_new_tokens, remaining_budget)
+            params_list.append({
+                    "prompt": batch_full_prompt[i],
+                    "max_tokens": current_max_tokens,
+                })
+        with ThreadPoolExecutor(max_workers=None) as executor:
+            batch_outputs = list(executor.map(self.make_api_call, params_list))
+        batch_generated_texts = [output.choices[0].text.strip() for output in batch_outputs]
+        batch_num_tokens = [output.usage.completion_tokens for output in batch_outputs]
+        batch_content_reprs = self.get_text_representation(batch_generated_texts)
+        return batch_generated_texts, batch_num_tokens, batch_content_reprs
+    def dynamic_skeleton_sampling(self, queries: List[str], M: int) -> Tuple[List[MetaReasoningDAG], torch.Tensor]:
+        """
+        Algorithm 1: Dynamic Skeleton Sampling at inference time
+        Args:
+            queries: List of input query strings
+            M: Number of trajectories per query
+        Returns:
+            batch_dags: List of generated MetaReasoningDAGs
+            total_log_probs: Tensor of total log probabilities for each trajectory
+        """
+        # === 1. Initialize M*N DAGs ===
+        N = len(queries)
+        batch_size = N * M
+        batch_dags: List[MetaReasoningDAG] = []
+        query_reprs = self.get_text_representation(queries)
+        for i in range(N):
+            for _ in range(M):
+                batch_dags.append(
+                    MetaReasoningDAG(queries[i], query_reprs[i], 0)  # we don't count query tokens, set 0
+                )
+        total_log_probs = torch.zeros(batch_size).to(self.device)
+        # the idx of trajectories that are still active
+        active_indices = list(range(batch_size))
+        i = 0  # Current topology step (i=1 is the first new node)
+        while active_indices:
+            i += 1
+            sampled_strategies = {dag_idx: [] for dag_idx in active_indices}
+            incoming_edges = {dag_idx: [] for dag_idx in active_indices}
+            # Step 1: Determine incoming edges (traverse in reverse order)
+            for j in range(i-1, -1, -1):
+                node_j_content_reprs = torch.stack([batch_dags[idx].get_node_content_repr(j) for idx in active_indices], dim=0)
+                context_reprs = torch.stack([batch_dags[idx].get_context_repr_up_to(i-1) for idx in active_indices], dim=0)
+                strategy_idx, log_prob = self.sample_strategy(
+                    node_j_content_reprs,
+                    sampled_strategies,
+                    context_reprs
+                )
+                for k, dag_idx in enumerate(active_indices):
+                    sampled_strategies[dag_idx].append(strategy_idx[k])
+                total_log_probs[active_indices] += log_prob
+                for dag_idx in active_indices:
+                    strategy_idx = sampled_strategies[dag_idx][-1]
+                    strategy_name = self.idx_to_strategy[strategy_idx]
+                    if strategy_name != "zero":
+                        incoming_edges[dag_idx].append((j, strategy_name))
+            # Step 2: Check which DAGs are still active
+            for dag_idx in active_indices.copy():
+                if not incoming_edges[dag_idx]:
+                    active_indices.remove(dag_idx)
+            if not active_indices:
+                break
+            # Step 3: Generate base reasoning content
+            batch_strategies = []
+            batch_context = []
+            batch_query = []
+            batch_remaining_budget = []
+            for dag_idx in active_indices:
+                dag = batch_dags[dag_idx]
+                strategies = [edge[1] for edge in incoming_edges[dag_idx]]
+                batch_strategies.append(strategies)
+                context = dag.get_context_up_to(i-1)
+                batch_context.append(context)
+                batch_query.append(dag.nodes[0].content)
+                batch_remaining_budget.append(self.token_budget - dag.total_tokens())
+            batch_content, batch_num_tokens, batch_content_repr = self.generate_content(
+                                                                                        batch_query,
+                                                                                        batch_context,
+                                                                                        batch_strategies,
+                                                                                        batch_remaining_budget
+            )
+            # Step 4: Update DAGs with new nodes and edges
+            for k, dag_idx in enumerate(active_indices):
+                dag = batch_dags[dag_idx]
+                content = batch_content[k]
+                num_tokens = batch_num_tokens[k]
+                content_repr = batch_content_repr[k]
+                dag.add_node(content, num_tokens, content_repr)
+            for dag_idx in incoming_edges:
+                dag = batch_dags[dag_idx]
+                for from_j, strategy in incoming_edges[dag_idx]:
+                    dag.add_edge(from_j, i, strategy)
+            # Step 5: Check stopping criteria
+            for dag_idx in active_indices.copy():
+                dag = batch_dags[dag_idx]
+                content = dag.get_node_content(i)
+                if not content or "boxed" in content.lower() or dag.total_tokens() >= self.token_budget:
+                    active_indices.remove(dag_idx)
+        return batch_dags, total_log_probs
+    def extract_answer(self, batch_dags: List[MetaReasoningDAG]) -> List[str]:
+        """Extract final answer from the reasoning DAG"""
+        batch_answer_prompts = []
+        params_list = []
+        for dag in batch_dags:
+            full_context = dag.get_context_up_to(len(dag.nodes) - 1)
+            answer_prompt = f"{full_context}\n{random.choice(META_STRATEGIES['Answer'])}\n"
+            params_list.append( {
+                    "prompt": answer_prompt,
+                    "max_tokens": self.config.max_new_tokens,
+                })
+        with ThreadPoolExecutor(max_workers=None) as executor:
+            batch_outputs = list(executor.map(self.make_api_call, params_list))
+        batch_answers = [output.choices[0].text.strip() for output in batch_outputs]
+        return batch_answers
+    def inference(self, batch_queries: List[str], M: int) -> Tuple[str, MetaReasoningDAG]:
+        """Run inference on a single query"""
+        self.strategy_mlp.eval()
+        self.strategy_embeddings.eval()
+        with torch.no_grad():
+            batch_dags, _ = self.dynamic_skeleton_sampling(batch_queries, M)
+            batch_answers = self.extract_answer(batch_dags)
+        return batch_answers, batch_dags
+    def save_checkpoint(self, path: str):
+        """Save model checkpoint"""
+        torch.save({
+            'strategy_embeddings': self.strategy_embeddings.state_dict(),
+            'strategy_mlp': self.strategy_mlp.state_dict(),
+            'optimizer': self.optimizer.state_dict()
+        }, path)
+        print(f"Checkpoint saved to {path}")
+    def load_checkpoint(self, path: str):
+        """Load model checkpoint"""
+        checkpoint = torch.load(path, map_location=self.device)
+        self.strategy_embeddings.load_state_dict(checkpoint['strategy_embeddings'])
+        self.strategy_mlp.load_state_dict(checkpoint['strategy_mlp'])
+        self.optimizer.load_state_dict(checkpoint['optimizer'])
+        print(f"Checkpoint loaded from {path}")
+    def make_api_call(self,params):
+        """Make API call to vLLM server"""
+        return self.generator_client.completions.create(
+            model=self.model_name_for_api,
+            prompt=params["prompt"],
+            max_tokens=params["max_tokens"],
+            temperature=self.config.temperature,
+            top_p=self.config.top_p,
+        )

automr/strategies.py ADDED Viewed

	@@ -0,0 +1,40 @@

+# Meta Reasoning Strategy Prompts (from Table 2 in paper)
+META_STRATEGIES = {
+    "Next": [
+        "Next,",
+        "Then,",
+        "Now, let me move on to the next step."
+    ],
+    "Reflect": [
+        "Let me consider what part of the reasoning feels least certain, and how can it be examined.",
+        "Wait, let me think if there anything missing in the current reasoning.",
+        "Let me think does the current line of thought have any error."
+    ],
+    "Explore": [
+        "Let me consider which direction of thinking I should explore.",
+        "Let me think what potential strategy has not yet been considered that could be the next solution path.",
+        "Let me think what possible solution could be tried next."
+    ],
+    "Decompose": [
+        "This question is a bit complex, let me think how to decompose it into sub-questions that I can solve.",
+        "The question feels too broad, let me think what smaller version could I tackle first.",
+        "Let me think if I can express the problem in terms of simpler components or modules.",
+        "Let me consider the options one by one."  # For multiple choice
+    ],
+    "Summarize": [
+        "Let me summarize what have I established so far.",
+        "Let me summarize the current state of reasoning process, what's known, unknown, and assumed?",
+        "Let me consider if I can captures the essence of the reasoning so far with single sentence."
+    ],
+    "Recall": [
+        "Let me think if I have encountered similar problems or if learned knowledge and previous intermediate step can be used here.",
+        "Let me think what prior reasoning steps are directly relevant here or this question connect to earlier results.",
+        "Let me recall which theorems, rules, or principles from earlier knowledge is related to this question."
+    ],
+    "Answer": [
+        "Let me give the answer according to current reasoning context."
+    ]
+}
+# All strategies including the special "zero" edge type
+STRATEGY_LIST = ["Next", "Reflect", "Explore", "Decompose", "Summarize", "Recall", "Answer", "zero"]

automr/trainer.py ADDED Viewed

	@@ -0,0 +1,244 @@

+import random
+import torch
+from typing import List, Dict, Tuple
+from tqdm import tqdm
+from .model import AutoMR
+from .config import AutoMRConfig
+from .utils import check_answer_match, ensure_dir, save_json
+import os
+class AutoMRTrainer:
+    """Trainer for AutoMR using REINFORCE (Algorithm 2 from paper)"""
+    def __init__(self, model: AutoMR, config: AutoMRConfig):
+        self.model = model
+        self.config = config
+        ensure_dir(config.checkpoint_dir)
+        # Track training progress
+        self.global_step = 0
+        self.best_val_reward = -float('inf')
+        self.patience_counter = 0
+        self.training_history = {
+            'train_loss': [],
+            'train_reward': [],
+            'val_reward': [],
+            'val_accuracy': [],
+            'steps': []
+        }
+    def compute_reward_batch(self, queries: List[str], answers: List[str]) -> Tuple[float, float]:
+        """
+        Compute average reward and accuracy on a batch
+        Returns: (avg_reward, accuracy)
+        """
+        total_reward = 0.0
+        correct = 0
+        total = len(queries)
+        self.model.strategy_mlp.eval()
+        self.model.strategy_embeddings.eval()
+        with torch.no_grad():
+            dags, _ = self.model.dynamic_skeleton_sampling(queries, M=1)
+            pred_answers = self.model.extract_answer(dags)
+            is_correct = [check_answer_match(
+                pred_answer, answer, self.config.task_type
+            ) for pred_answer, answer in zip(pred_answers, answers)]
+            rewards = [1.0 if correct else -1.0 for correct in is_correct]
+            total_reward += sum(rewards)
+            correct += sum(is_correct)
+        avg_reward = total_reward / total if total > 0 else 0.0
+        accuracy = correct / total if total > 0 else 0.0
+        return avg_reward, accuracy
+    def validate(self, val_data: List[Dict[str, str]]) -> Tuple[float, float]:
+        """
+        Run validation on validation set
+        Returns: (avg_reward, accuracy)
+        """
+        # Sample validation batch
+        val_batch_size = min(self.config.val_batch_size, len(val_data))
+        val_batch = random.sample(val_data, val_batch_size)
+        val_queries = [item['query'] for item in val_batch]
+        val_answers = [item['answer'] for item in val_batch]
+        avg_reward, accuracy = self.compute_reward_batch(val_queries, val_answers)
+        return avg_reward, accuracy
+    def train_step(self, batch_queries: List[str], batch_answers: List[str]) -> Tuple[float, float]:
+        """
+        Single training step using REINFORCE (Equation 4 from paper)
+        Returns: (loss, avg_reward)
+        """
+        self.model.strategy_mlp.train()
+        self.model.strategy_embeddings.train()
+        M = self.config.num_samples_per_query
+        loss = []
+        rewards_list = []
+        # expand answers for M samples per query
+        batch_answers = [answer for answer in batch_answers for _ in range(M)]
+        batch_dags, batch_log_probs = self.model.dynamic_skeleton_sampling(batch_queries, M)
+        # Get prediction
+        batch_pred_answers = self.model.extract_answer(batch_dags)
+        # Compute reward
+        for pred_answer, answer, log_prob in zip(batch_pred_answers, batch_answers, batch_log_probs):
+            reward = 1.0 if check_answer_match(
+                pred_answer, answer, self.config.task_type
+            ) else -1.0
+            rewards_list.append(reward)
+            # Accumulate gradient (REINFORCE)
+            loss.append(-reward * log_prob)
+        # Compute average reward for this batch
+        avg_reward = sum(rewards_list) / len(rewards_list) if rewards_list else 0.0
+        # Update parameters
+        self.model.optimizer.zero_grad()
+        loss = torch.stack(loss).mean()
+        loss.backward()
+        torch.nn.utils.clip_grad_norm_(
+            list(self.model.strategy_embeddings.parameters()) +
+            list(self.model.strategy_mlp.parameters()),
+            max_norm=self.config.gradient_clip
+        )
+        self.model.optimizer.step()
+        return loss.item(), avg_reward
+    def should_stop_early(self) -> bool:
+        """Check if training should stop early"""
+        return self.patience_counter >= self.config.early_stopping_patience
+    def save_history(self):
+        history_path = os.path.join(self.config.checkpoint_dir, "training_history.json")
+        save_json(self.training_history, history_path)
+    def save_checkpoint(self, epoch: int, is_best: bool = False):
+        """Save checkpoint"""
+        if self.config.save_best_only and not is_best:
+            return
+        checkpoint_name = f"checkpoint_epoch_{epoch}_step_{self.global_step}.pt"
+        if is_best:
+            checkpoint_name = "best_checkpoint.pt"
+        checkpoint_path = os.path.join(self.config.checkpoint_dir, checkpoint_name)
+        self.model.save_checkpoint(checkpoint_path)
+        # Also save training history
+        self.save_history()
+        if is_best:
+            print(f"  💾 Best checkpoint saved: {checkpoint_path}")
+    def train(self, train_data: List[Dict[str, str]], val_data: List[Dict[str, str]]):
+        """Training loop with validation (Algorithm 2 from paper with validation)"""
+        print(f"\nStarting AutoMR training for {self.config.num_epochs} epochs...")
+        print(f"Training samples: {len(train_data)}")
+        print(f"Validation samples: {len(val_data)}")
+        print(f"Batch size: {self.config.batch_size}")
+        print(f"Samples per query: {self.config.num_samples_per_query}")
+        print(f"Validation every {self.config.val_every_n_steps} steps")
+        print(f"Early stopping patience: {self.config.early_stopping_patience}\n")
+        for epoch in range(self.config.num_epochs):
+            random.shuffle(train_data)
+            epoch_loss = 0.0
+            epoch_reward = 0.0
+            num_batches = 0
+            pbar = tqdm(
+                range(0, len(train_data), self.config.batch_size),
+                desc=f"Epoch {epoch+1}/{self.config.num_epochs}"
+            )
+            for i in pbar:
+                batch = train_data[i:i+self.config.batch_size]
+                batch_queries = [item['query'] for item in batch]
+                batch_answers = [item['answer'] for item in batch]
+                # Training step
+                loss, avg_reward = self.train_step(batch_queries, batch_answers)
+                epoch_loss += loss
+                epoch_reward += avg_reward
+                num_batches += 1
+                self.global_step += 1
+                self.training_history['train_reward'].append(avg_reward)
+                self.save_history()
+                pbar.set_postfix({
+                    'loss': f'{loss:.4f}',
+                    'reward': f'{avg_reward:.3f}',
+                    'step': self.global_step
+                })
+                # Validation
+                if self.global_step % self.config.val_every_n_steps == 0:
+                    print(f"\n{'='*80}")
+                    print(f"Validation at Step {self.global_step}")
+                    print(f"{'='*80}")
+                    val_reward, val_accuracy = self.validate(val_data)
+                    print(f"Validation Reward: {val_reward:.4f}")
+                    print(f"Validation Accuracy: {val_accuracy:.2%}")
+                    # Record history
+                    self.training_history['val_reward'].append(val_reward)
+                    self.training_history['val_accuracy'].append(val_accuracy)
+                    self.training_history['steps'].append(self.global_step)
+                    # Check if this is the best model
+                    is_best = val_reward > self.best_val_reward
+                    if is_best:
+                        print(f"✨ New best validation reward: {val_reward:.4f} (previous: {self.best_val_reward:.4f})")
+                        self.best_val_reward = val_reward
+                        self.patience_counter = 0
+                        self.save_checkpoint(epoch + 1, is_best=True)
+                    else:
+                        self.patience_counter += 1
+                        print(f"No improvement. Patience: {self.patience_counter}/{self.config.early_stopping_patience}")
+                    print(f"{'='*80}\n")
+                    # Check early stopping
+                    if self.should_stop_early():
+                        print(f"\n  Early stopping triggered after {self.global_step} steps")
+                        print(f"Best validation reward: {self.best_val_reward:.4f}")
+                        return
+            # End of epoch
+            avg_epoch_loss = epoch_loss / num_batches
+            avg_epoch_reward = epoch_reward / num_batches
+            self.training_history['train_loss'].append(avg_epoch_loss)
+            self.training_history['train_reward'].append(avg_epoch_reward)
+            print(f"\n{'='*80}")
+            print(f"Epoch {epoch+1} Summary")
+            print(f"{'='*80}")
+            print(f"Average Loss: {avg_epoch_loss:.4f}")
+            print(f"Average Reward: {avg_epoch_reward:.4f}")
+            print(f"Best Val Reward: {self.best_val_reward:.4f}")
+            print(f"{'='*80}\n")
+            # Save checkpoint at end of epoch (if not save_best_only)
+            if not self.config.save_best_only:
+                self.save_checkpoint(epoch + 1)
+        print("Training completed!")
+        print(f"Best validation reward achieved: {self.best_val_reward:.4f}")

automr/utils.py ADDED Viewed

	@@ -0,0 +1,693 @@

+import re
+import os
+import json
+from typing import Any
+import re
+import regex
+from latex2sympy2 import latex2sympy
+from word2number import w2n
+def extract_math_answer(text: str) -> str:
+    """Extract answer from math problem (boxed format)"""
+    # Try to find \boxed{...} format
+    boxed_pattern = r'\\boxed\{([^}]*)\}'
+    matches = re.findall(boxed_pattern, text)
+    if matches:
+        return matches[-1].strip()
+    # Try to find answer after "answer is" or similar phrases
+    answer_patterns = [
+        r'answer is[:\s]+([^\n.]+)',
+        r'final answer[:\s]+([^\n.]+)',
+        r'therefore[,:\s]+([^\n.]+)'
+    ]
+    for pattern in answer_patterns:
+        matches = re.findall(pattern, text.lower())
+        if matches:
+            return matches[-1].strip()
+    return text.strip()
+def extract_multiple_choice_answer(text: str) -> str:
+    """Extract answer from multiple choice (A, B, C, D format)"""
+    # Look for single letter answers
+    pattern = r'\b([A-D])\b'
+    matches = re.findall(pattern, text.upper())
+    if matches:
+        return matches[-1]
+    return text.strip()
+def normalize_answer(answer: str) -> str:
+    """Normalize answer for comparison"""
+    answer = answer.strip().lower()
+    # Remove common mathematical notation
+    answer = answer.replace('$', '').replace('\\', '')
+    # Remove extra whitespace
+    answer = ' '.join(answer.split())
+    return answer
+def check_answer_match(pred: str, ground_truth: str, task_type: str = "math") -> bool:
+    """Check if predicted answer matches ground truth"""
+    # if task_type == "math":
+    #     pred = extract_math_answer(pred)
+    #     ground_truth = extract_math_answer(ground_truth)
+    # elif task_type == "multiple_choice":
+    #     pred = extract_multiple_choice_answer(pred)
+    #     ground_truth = extract_multiple_choice_answer(ground_truth)
+    # pred_norm = normalize_answer(pred)
+    # gt_norm = normalize_answer(ground_truth)
+    # return pred_norm == gt_norm or pred_norm in gt_norm or gt_norm in pred_norm
+    pred = str(pred)
+    ground_truth = str(ground_truth)
+    return pred == ground_truth or pred in ground_truth or ground_truth in pred
+def ensure_dir(directory: str):
+    """Create directory if it doesn't exist"""
+    if not os.path.exists(directory):
+        os.makedirs(directory)
+def save_json(data: Any, path: str):
+    """Save data to JSON file"""
+    ensure_dir(os.path.dirname(path))
+    with open(path, 'w') as f:
+        json.dump(data, f, indent=2)
+def load_json(path: str) -> Any:
+    """Load data from JSON file"""
+    with open(path, 'r') as f:
+        return json.load(f)
+def _fix_fracs(string):
+    substrs = string.split("\\frac")
+    new_str = substrs[0]
+    if len(substrs) > 1:
+        substrs = substrs[1:]
+        for substr in substrs:
+            new_str += "\\frac"
+            if len(substr) > 0 and substr[0] == "{":
+                new_str += substr
+            else:
+                try:
+                    assert len(substr) >= 2
+                except:
+                    return string
+                a = substr[0]
+                b = substr[1]
+                if b != "{":
+                    if len(substr) > 2:
+                        post_substr = substr[2:]
+                        new_str += "{" + a + "}{" + b + "}" + post_substr
+                    else:
+                        new_str += "{" + a + "}{" + b + "}"
+                else:
+                    if len(substr) > 2:
+                        post_substr = substr[2:]
+                        new_str += "{" + a + "}" + b + post_substr
+                    else:
+                        new_str += "{" + a + "}" + b
+    string = new_str
+    return string
+def _fix_a_slash_b(string):
+    if len(string.split("/")) != 2:
+        return string
+    a = string.split("/")[0]
+    b = string.split("/")[1]
+    try:
+        if "sqrt" not in a:
+            a = int(a)
+        if "sqrt" not in b:
+            b = int(b)
+        assert string == "{}/{}".format(a, b)
+        new_string = "\\frac{" + str(a) + "}{" + str(b) + "}"
+        return new_string
+    except:
+        return string
+def _fix_sqrt(string):
+    _string = re.sub(r"\\sqrt(\w+)", r"\\sqrt{\1}", string)
+    return _string
+def convert_word_number(text: str) -> str:
+    try:
+        text = str(w2n.word_to_num(text))
+    except:
+        pass
+    return text
+# units mainly from MathQA
+unit_texts = [
+    "east",
+    "degree",
+    "mph",
+    "kmph",
+    "ft",
+    "m sqaure",
+    " m east",
+    "sq m",
+    "deg",
+    "mile",
+    "q .",
+    "monkey",
+    "prime",
+    "ratio",
+    "profit of rs",
+    "rd",
+    "o",
+    "gm",
+    "p . m",
+    "lb",
+    "tile",
+    "per",
+    "dm",
+    "lt",
+    "gain",
+    "ab",
+    "way",
+    "west",
+    "a .",
+    "b .",
+    "c .",
+    "d .",
+    "e .",
+    "f .",
+    "g .",
+    "h .",
+    "t",
+    "a",
+    "h",
+    "no change",
+    "men",
+    "soldier",
+    "pie",
+    "bc",
+    "excess",
+    "st",
+    "inches",
+    "noon",
+    "percent",
+    "by",
+    "gal",
+    "kmh",
+    "c",
+    "acre",
+    "rise",
+    "a . m",
+    "th",
+    "π r 2",
+    "sq",
+    "mark",
+    "l",
+    "toy",
+    "coin",
+    "sq . m",
+    "gallon",
+    "° f",
+    "profit",
+    "minw",
+    "yr",
+    "women",
+    "feet",
+    "am",
+    "pm",
+    "hr",
+    "cu cm",
+    "square",
+    "v â € ™",
+    "are",
+    "rupee",
+    "rounds",
+    "cubic",
+    "cc",
+    "mtr",
+    "s",
+    "ohm",
+    "number",
+    "kmph",
+    "day",
+    "hour",
+    "minute",
+    "min",
+    "second",
+    "man",
+    "woman",
+    "sec",
+    "cube",
+    "mt",
+    "sq inch",
+    "mp",
+    "∏ cm ³",
+    "hectare",
+    "more",
+    "sec",
+    "unit",
+    "cu . m",
+    "cm 2",
+    "rs .",
+    "rs",
+    "kg",
+    "g",
+    "month",
+    "km",
+    "m",
+    "cm",
+    "mm",
+    "apple",
+    "liter",
+    "loss",
+    "yard",
+    "pure",
+    "year",
+    "increase",
+    "decrease",
+    "d",
+    "less",
+    "Surface",
+    "litre",
+    "pi sq m",
+    "s .",
+    "metre",
+    "meter",
+    "inch",
+]
+unit_texts.extend([t + "s" for t in unit_texts])
+def strip_string(string, skip_unit=False):
+    string = str(string).strip()
+    # linebreaks
+    string = string.replace("\n", "")
+    # right "."
+    string = string.rstrip(".")
+    # remove inverse spaces
+    # replace \\ with \
+    string = string.replace("\\!", "")
+    # string = string.replace("\\ ", "")
+    # string = string.replace("\\\\", "\\")
+    # matrix
+    string = re.sub(r"\\begin\{array\}\{.*?\}", r"\\begin{pmatrix}", string)
+    string = re.sub(r"\\end\{array\}", r"\\end{pmatrix}", string)
+    string = string.replace("bmatrix", "pmatrix")
+    # replace tfrac and dfrac with frac
+    string = string.replace("tfrac", "frac")
+    string = string.replace("dfrac", "frac")
+    string = (string.replace("\\neq", "\\ne").replace("\\leq", "\\le").replace("\\geq", "\\ge"))
+    # remove \left and \right
+    string = string.replace("\\left", "")
+    string = string.replace("\\right", "")
+    string = string.replace("\\{", "{")
+    string = string.replace("\\}", "}")
+    # Remove unit: miles, dollars if after is not none
+    _string = re.sub(r"\\text{.*?}$", "", string).strip()
+    if _string != "" and _string != string:
+        # print("Warning: unit not removed: '{}' -> '{}'".format(string, _string))
+        string = _string
+    if not skip_unit:
+        # Remove unit: texts
+        for _ in range(2):
+            for unit_text in unit_texts:
+                # use regex, the prefix should be either the start of the string or a non-alphanumeric character
+                # the suffix should be either the end of the string or a non-alphanumeric character
+                _string = re.sub(r"(^|\W)" + unit_text + r"($|\W)", r"\1\2", string)
+                if _string != "":
+                    string = _string
+    # Remove circ (degrees)
+    string = string.replace("^{\\circ}", "")
+    string = string.replace("^\\circ", "")
+    # remove dollar signs
+    string = string.replace("\\$", "")
+    string = string.replace("$", "")
+    string = string.replace("\\(", "").replace("\\)", "")
+    # convert word number to digit
+    string = convert_word_number(string)
+    # replace "\\text{...}" to "..."
+    string = re.sub(r"\\text\{(.*?)\}", r"\1", string)
+    for key in ["x=", "y=", "z=", "x\\in", "y\\in", "z\\in", "x\\to", "y\\to", "z\\to"]:
+        string = string.replace(key, "")
+    string = string.replace("\\emptyset", r"{}")
+    string = string.replace("(-\\infty,\\infty)", "\\mathbb{R}")
+    # remove percentage
+    string = string.replace("\\%", "")
+    string = string.replace("\%", "")
+    string = string.replace("%", "")
+    # " 0." equivalent to " ." and "{0." equivalent to "{." Alternatively, add "0" if "." is the start of the string
+    string = string.replace(" .", " 0.")
+    string = string.replace("{.", "{0.")
+    # cdot
+    # string = string.replace("\\cdot", "")
+    if (string.startswith("{") and string.endswith("}") and string.isalnum() or
+            string.startswith("(") and string.endswith(")") and string.isalnum() or
+            string.startswith("[") and string.endswith("]") and string.isalnum()):
+        string = string[1:-1]
+    # inf
+    string = string.replace("infinity", "\\infty")
+    if "\\infty" not in string:
+        string = string.replace("inf", "\\infty")
+    string = string.replace("+\\inity", "\\infty")
+    # and
+    string = string.replace("and", "")
+    string = string.replace("\\mathbf", "")
+    # use regex to remove \mbox{...}
+    string = re.sub(r"\\mbox{.*?}", "", string)
+    # quote
+    string.replace("'", "")
+    string.replace('"', "")
+    # i, j
+    if "j" in string and "i" not in string:
+        string = string.replace("j", "i")
+    # replace a.000b where b is not number or b is end, with ab, use regex
+    string = re.sub(r"(\d+)\.0*([^\d])", r"\1\2", string)
+    string = re.sub(r"(\d+)\.0*$", r"\1", string)
+    # if empty, return empty string
+    if len(string) == 0:
+        return string
+    if string[0] == ".":
+        string = "0" + string
+    # to consider: get rid of e.g. "k = " or "q = " at beginning
+    if len(string.split("=")) == 2:
+        if len(string.split("=")[0]) <= 2:
+            string = string.split("=")[1]
+    string = _fix_sqrt(string)
+    string = string.replace(" ", "")
+    # \frac1b or \frac12 --> \frac{1}{b} and \frac{1}{2}, etc. Even works with \frac1{72} (but not \frac{72}1). Also does a/b --> \\frac{a}{b}
+    string = _fix_fracs(string)
+    # NOTE: X/Y changed to \frac{X}{Y} in dataset, but in simple cases fix in case the model output is X/Y
+    string = _fix_a_slash_b(string)
+    return string
+direct_answer_trigger_for_fewshot = ("choice is", "answer is")
+def choice_answer_clean(pred: str):
+    pred = pred.strip("\n")
+    # Determine if this is ICL, if so, use \n\n to split the first chunk.
+    ICL = False
+    for trigger in direct_answer_trigger_for_fewshot:
+        if pred.count(trigger) > 1:
+            ICL = True
+    if ICL:
+        pred = pred.split("\n\n")[0]
+    # Split the trigger to find the answer.
+    preds = re.split("|".join(direct_answer_trigger_for_fewshot), pred)
+    if len(preds) > 1:
+        answer_flag = True
+        pred = preds[-1]
+    else:
+        answer_flag = False
+    pred = pred.strip("\n").rstrip(".").rstrip("/").strip(" ").lstrip(":")
+    # Clean the answer based on the dataset
+    tmp = re.findall(r"\b(A|B|C|D|E)\b", pred.upper())
+    if tmp:
+        pred = tmp
+    else:
+        pred = [pred.strip().strip(".")]
+    if len(pred) == 0:
+        pred = ""
+    else:
+        if answer_flag:
+            # choose the first element in list ...
+            pred = pred[0]
+        else:
+            # choose the last e
+            pred = pred[-1]
+    # Remove the period at the end, again!
+    pred = pred.rstrip(".").rstrip("/")
+    return pred
+def find_box(pred_str: str):
+    ans = pred_str.split("boxed")[-1]
+    if not ans:
+        return ""
+    if ans[0] == "{":
+        stack = 1
+        a = ""
+        for c in ans[1:]:
+            if c == "{":
+                stack += 1
+                a += c
+            elif c == "}":
+                stack -= 1
+                if stack == 0:
+                    break
+                a += c
+            else:
+                a += c
+    else:
+        a = ans.split("$")[0].strip()
+    return a
+def clean_units(pred_str: str):
+    """Clean the units in the number."""
+    def convert_pi_to_number(code_string):
+        code_string = code_string.replace("\\pi", "π")
+        # Replace \pi or π not preceded by a digit or } with 3.14
+        code_string = re.sub(r"(?<![\d}])\\?π", "3.14", code_string)
+        # Replace instances where π is preceded by a digit but without a multiplication symbol, e.g., "3π" -> "3*3.14"
+        code_string = re.sub(r"(\d)(\\?π)", r"\1*3.14", code_string)
+        # Handle cases where π is within braces or followed by a multiplication symbol
+        # This replaces "{π}" with "3.14" directly and "3*π" with "3*3.14"
+        code_string = re.sub(r"\{(\\?π)\}", "3.14", code_string)
+        code_string = re.sub(r"\*(\\?π)", "*3.14", code_string)
+        return code_string
+    pred_str = convert_pi_to_number(pred_str)
+    pred_str = pred_str.replace("%", "/100")
+    pred_str = pred_str.replace("$", "")
+    pred_str = pred_str.replace("¥", "")
+    pred_str = pred_str.replace("°C", "")
+    pred_str = pred_str.replace(" C", "")
+    pred_str = pred_str.replace("°", "")
+    return pred_str
+def extract_theoremqa_answer(pred: str, answer_flag: bool = True):
+    if any([option in pred.lower() for option in ["yes", "true"]]):
+        pred = "True"
+    elif any([option in pred.lower() for option in ["no", "false"]]):
+        pred = "False"
+    elif any([option in pred.lower() for option in ["(a)", "(b)", "(c)", "(d)", "(e)", "(f)"]]):
+        pass
+    else:
+        # Some of the models somehow get used to boxed output from pre-training
+        if "boxed" in pred:
+            pred = find_box(pred)
+        if answer_flag:
+            # Extract the numbers out of the string
+            pred = pred.split("=")[-1].strip()
+            pred = clean_units(pred)
+            try:
+                tmp = str(latex2sympy(pred))
+                pred = str(eval(tmp))
+            except Exception:
+                if re.match(r"-?[\d\.]+\s\D+$", pred):
+                    pred = pred.split(" ")[0]
+                elif re.match(r"-?[\d\.]+\s[^\s]+$", pred):
+                    pred = pred.split(" ")[0]
+        else:
+            # desparate search over the last number
+            preds = re.findall(r"-?\d*\.?\d+", pred)
+            if len(preds) >= 1:
+                pred = preds[-1]
+            else:
+                pred = ""
+    return pred
+def extract_answer(pred_str, data_name, use_last_number=True):
+    if data_name.lower() == "humaneval":
+        pattern = r"### Function Body:\s*\n```python\n(.*?)\n```"
+        matches = re.findall(pattern, pred_str, re.DOTALL)
+        try:
+            return matches[0]
+        except IndexError:
+            return ""
+    elif data_name.lower() == "mmlu":
+        if len(pred_str) >= 3 and pred_str[0] == '(' and pred_str[2] == ')':
+            return pred_str[1]
+    pred_str = pred_str.replace("\u043a\u0438", "")
+    if "final answer is $" in pred_str and "$. I hope" in pred_str:
+        # minerva_math
+        tmp = pred_str.split("final answer is $", 1)[1]
+        pred = tmp.split("$. I hope", 1)[0].strip()
+    elif "boxed" in pred_str:
+        ans = pred_str.split("boxed")[-1]
+        if len(ans) == 0:
+            return ""
+        elif ans[0] == "{":
+            stack = 1
+            a = ""
+            for c in ans[1:]:
+                if c == "{":
+                    stack += 1
+                    a += c
+                elif c == "}":
+                    stack -= 1
+                    if stack == 0:
+                        break
+                    a += c
+                else:
+                    a += c
+        else:
+            a = ans.split("$")[0].strip()
+        pred = a
+    elif "he answer is" in pred_str:
+        pred = pred_str.split("he answer is")[-1].strip()
+    elif "final answer is" in pred_str:
+        pred = pred_str.split("final answer is")[-1].strip()
+    elif "答案是" in pred_str:
+        # Handle Chinese few-shot multiple choice problem answer extraction
+        pred = pred_str.split("答案是")[1].strip().split("\n\n")[0].strip()
+    else:  # use the last number
+        if use_last_number:
+            pattern = "-?\d*\.?\d+"
+            pred = re.findall(pattern, pred_str.replace(",", ""))
+            if len(pred) >= 1:
+                pred = pred[-1]
+            else:
+                pred = ""
+        else:
+            pred = ""
+    # multiple line
+    # pred = pred.split("\n")[0]
+    pred = re.sub(r"\n\s*", "", pred)
+    if pred != "" and pred[0] == ":":
+        pred = pred[1:]
+    if pred != "" and pred[-1] == ".":
+        pred = pred[:-1]
+    if pred != "" and pred[-1] == "/":
+        pred = pred[:-1]
+    pred = strip_string(pred, skip_unit=data_name in ["carp_en", "minerva"])
+    if data_name == 'GPQA' or data_name == 'MMLU':
+        if len(pred) >= 3 and pred[0] == '(' and pred[2] == ')':
+            pred = pred[1]
+    return pred
+STRIP_EXCEPTIONS = ["carp_en", "minerva"]
+def parse_ground_truth(groudtruth_solution: str, data_name):
+    gt_ans = extract_answer(groudtruth_solution, data_name)
+    return gt_ans
+def parse_question(example, data_name):
+    question = ""
+    if data_name == "asdiv":
+        question = f"{example['body'].strip()} {example['question'].strip()}"
+    elif data_name == "svamp":
+        body = example["Body"].strip()
+        if not body.endswith("."):
+            body = body + "."
+        question = f'{body} {example["Question"].strip()}'
+    elif data_name == "tabmwp":
+        title_str = (f'regarding "{example["table_title"]}" ' if example["table_title"] else "")
+        question = f"Read the following table {title_str}and answer a question:\n"
+        question += f'{example["table"]}\n{example["question"]}'
+        if example["choices"]:
+            question += (f' Please select from the following options: {example["choices"]}')
+    elif data_name == "carp_en":
+        question = example["content"]
+    elif data_name == "mmlu_stem":
+        options = example["choices"]
+        assert len(options) == 4
+        for i, (label, option) in enumerate(zip("ABCD", options)):
+            options[i] = f"({label}) {str(option).strip()}"
+        options = " ".join(options)
+        # question = f"{example['question'].strip()}\nWhat of the following is the right choice? Explain your answer.\n{options}"
+        question = f"{example['question'].strip()}\nAnswer Choices: {options}"
+    elif data_name == "sat_math":
+        options = example["options"].strip()
+        assert "A" == options[0]
+        options = "(" + options
+        for ch in "BCD":
+            if f" {ch}) " in options:
+                options = regex.sub(f" {ch}\) ", f" ({ch}) ", options)
+        # question = f"{example['question'].strip()}\nWhat of the following is the right choice? Explain your answer.\n{options.strip()}"
+        question = f"{example['question'].strip()}\nAnswer Choices: {options}"
+    elif "aqua" in data_name:
+        options = example["options"]
+        choice = "(" + "(".join(options)
+        choice = choice.replace("(", " (").replace(")", ") ").strip()
+        choice = "\nAnswer Choices: " + choice
+        question = example["question"].strip() + choice
+    elif data_name == "gaokao_math_qa":
+        options_dict = example["options"]
+        options = []
+        for key in options_dict:
+            options.append(f"({key}) {options_dict[key]}")
+        options = " ".join(options)
+        question = f"{example['question'].strip()}\n选项: {options}"
+    else:
+        for key in ["question", "problem", "Question", "input"]:
+            if key in example:
+                question = example[key]
+                break
+    # assert question != ""
+    # Yes or No question
+    _, gt_ans = parse_ground_truth(example, data_name)
+    if isinstance(gt_ans, str):
+        gt_lower = gt_ans.lower()
+        if gt_lower in ["true", "false"]:
+            question += " (True or False)"
+        if gt_lower in ["yes", "no"]:
+            question += " (Yes or No)"
+    return question.strip()

embedder_server.sh ADDED Viewed

	@@ -0,0 +1,11 @@

+export ASCEND_RT_VISIBLE_DEVICES=3
+export VLLM_USE_V1=1
+python -m vllm.entrypoints.openai.api_server \
+    --model "FreedomIntelligence/openPangu-Embedded-7B" \
+    --tensor-parallel-size 1 \
+    --port 8001 \
+    --host localhost \
+    --gpu-memory-utilization 0.4 \
+    --trust-remote-code \
+    --task embed \
+    --dtype bfloat16 \

generator_server.sh ADDED Viewed

	@@ -0,0 +1,10 @@

+export ASCEND_RT_VISIBLE_DEVICES=1
+export VLLM_USE_V1=1
+python -m vllm.entrypoints.openai.api_server \
+    --model "FreedomIntelligence/openPangu-Embedded-7B" \
+    --tensor-parallel-size 1 \
+    --port 8000 \
+    --host localhost \
+    --trust-remote-code \
+    --dtype bfloat16 \
+    --gpu-memory-utilization 0.90 \

main.py ADDED Viewed

	@@ -0,0 +1,119 @@

+import argparse
+import os
+import torch_npu
+from automr import AutoMR, AutoMRTrainer, AutoMREvaluator, AutoMRConfig
+from automr.data_loader import DataLoader
+def parse_args():
+    parser = argparse.ArgumentParser()
+    # Mode
+    parser.add_argument('--mode', type=str, default='train', choices=['train', 'eval'], help='Mode: train, eval, or train_eval')
+    # Model settings
+    parser.add_argument('--model_name', type=str, default='Qwen/Qwen2.5-3B-Instruct', help='Pretrained LLM model name')
+    parser.add_argument('--device', type=str, default='cuda', choices=['cuda', 'cpu', 'npu'], help='Device to use')
+    parser.add_argument('--token_budget', type=int, default=256, help='Token budget for reasoning')
+    parser.add_argument('--hidden_size', type=int, default=4096, help='Hidden size of the model')
+    # Training settings
+    parser.add_argument('--learning_rate', type=float, default=5e-4, help='Learning rate')
+    parser.add_argument('--num_epochs', type=int, default=5, help='Number of training epochs')
+    parser.add_argument('--batch_size', type=int, default=8, help='Batch size')
+    parser.add_argument('--num_samples', type=int, default=1, help='Number of skeletons to sample per query (M)')
+    # Data paths
+    parser.add_argument('--train_data', type=str, default='data/train.json', help='Path to training data')
+    parser.add_argument('--val_data', type=str, default='data/val.json', help='Path to validation data')
+    parser.add_argument('--test_data', type=str, default='data/test.json', help='Path to test data')
+    parser.add_argument('--checkpoint_dir', type=str, default='checkpoints', help='Directory to save checkpoints')
+    parser.add_argument('--results_dir', type=str, default='results', help='Directory to save results')
+    # Checkpoint
+    parser.add_argument('--load_checkpoint', type=str, default=None, help='Path to checkpoint to load')
+    # Task type
+    parser.add_argument('--task_type', type=str, default='math', choices=['math', 'multiple_choice'], help='Task type')
+    return parser.parse_args()
+def main():
+    args = parse_args()
+    # Create configuration
+    config = AutoMRConfig(
+        model_name=args.model_name,
+        device=args.device,
+        token_budget=args.token_budget,
+        learning_rate=args.learning_rate,
+        num_epochs=args.num_epochs,
+        batch_size=args.batch_size,
+        num_samples_per_query=args.num_samples,
+        train_data_path=args.train_data,
+        val_data_path=args.val_data,
+        test_data_path=args.test_data,
+        checkpoint_dir=args.checkpoint_dir,
+        results_dir=args.results_dir,
+        task_type=args.task_type,
+        hidden_size=args.hidden_size,
+    )
+    print("="*80)
+    print("AutoMR: Automatic Meta-Reasoning Skeleton Search")
+    print("="*80)
+    print(f"\nConfiguration:")
+    print(f"  Model: {config.model_name}")
+    print(f"  Device: {config.device}")
+    print(f"  Token Budget: {config.token_budget}")
+    print(f"  Task Type: {config.task_type}")
+    print(f"  Mode: {args.mode}")
+    print("="*80)
+    # Initialize model
+    model = AutoMR(config)
+    # Load checkpoint if specified
+    if args.load_checkpoint and os.path.exists(args.load_checkpoint):
+        model.load_checkpoint(args.load_checkpoint)
+    # Training mode
+    if args.mode == 'train':
+        print(f"\n{'='*80}")
+        print("TRAINING")
+        print("="*80)
+        # Load training data
+        train_data = DataLoader.load_data(config.train_data_path)
+        val_data = DataLoader.load_data(config.val_data_path)
+        print(f"Loaded {len(train_data)} training samples from {config.train_data_path}")
+        # Train
+        trainer = AutoMRTrainer(model, config)
+        trainer.train(train_data, val_data)
+    # Evaluation mode
+    elif args.mode == 'eval':
+        print(f"\n{'='*80}")
+        print("EVALUATION")
+        print("="*80)
+        # Load test data
+        test_data = DataLoader.load_data(config.test_data_path)
+        print(f"Loaded {len(test_data)} test samples from {config.test_data_path}")
+        # Evaluate
+        evaluator = AutoMREvaluator(model, config)
+        accuracy, results = evaluator.evaluate(test_data)
+        print(f"\n{'='*80}")
+        print(f"Final Accuracy: {accuracy:.2%}")
+        print("="*80)
+    else:
+        raise NotImplementedError
+if __name__ == "__main__":
+    main()

math_train.sh ADDED Viewed

	@@ -0,0 +1,12 @@

+VLLM_USE_V1=1 ASCEND_RT_VISIBLE_DEVICES=0 python main.py --mode train \
+  --device npu \
+  --model_name "FreedomIntelligence/openPangu-Embedded-7B" \
+  --train_data processed_data/MATH/train.jsonl \
+  --val_data processed_data/MATH/val.jsonl \
+  --num_epochs 5 \
+  --batch_size 8 \
+  --num_samples 4 \
+  --token_budget 4096 \
+  --checkpoint_dir checkpoints/MATH/pangu \
+  --task_type math \