========== ORIGINAL PROMPT ========== def build_open_ended_prompt(sample): """Build an open-ended prompt (no MCQ options).""" desc = sample.get('description', '') question = sample.get('question', '') prompt = f"""Look at the image and answer the physics question. {desc} {question} ========== ORIGINAL GENERATE ========== output_ids = model.generate(**inputs, max_new_tokens=MAX_NEW_TOKENS) generated = output_ids[0][inputs.input_ids.shape[1]:] response = processor.decode(generated, skip_special_tokens=True) except Exception as e: response = f"ERROR: {str(e)}" ========== MY PROMPT ========== def build_open_ended_prompt(sample): q = sample.get("question", "") return f"Look at this image and answer the physics question. Think step by step and put your final answer in \\boxed{{}}.\n\nQuestion: {q}" def run_inference_on_gpu(gpu_id, model_path, samples, output_file): """Run inference for a subset of samples on a specific GPU.""" ========== MY GENERATE ========== output_ids = model.generate(**inputs, max_new_tokens=2048, temperature=0.1, do_sample=False) output_text = processor.batch_decode(output_ids[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)[0]