import gradio as gr import llm_blender # Load the model and explicitly force it to use the CPU blender = llm_blender.Blender() blender.loadranker("llm-blender/PairRM", device="cpu") def score_pair(prompt, cand_a, cand_b): inputs = [prompt] candidates_A = [cand_a] candidates_B = [cand_b] # Compare returns a list of booleans (True if A is better) comparison_results = blender.compare(inputs, candidates_A, candidates_B) return "A" if comparison_results[0] else "B" # Create the API endpoint iface = gr.Interface( fn=score_pair, inputs=["text", "text", "text"], outputs="text" ) iface.launch()