from typing import Optional from huggingface_hub import InferenceClient from openai import OpenAI TEXT_SIMILARITY_JUDGE_PROMPT = """ You are given two pieces of text. Your task is to determine whether they are semantically equivalent based solely on their factual content. Here are the specific guidelines: - Texts are equivalent if they convey the same core information or concept, regardless of wording or structure - If one text has information that is a subset of the other text, then the texts are equivalent - Focus ONLY on the essential claims, not on: * Stylistic differences or tone * Level of detail (if the core facts remain the same) * Connotative differences between words * Implied significance or emphasis * Presentation order (if all key information is present in both) - Minor additions of non-contradictory information should not make texts non-equivalent - For ambiguous cases, prioritize the central claim or purpose of the text Examples of equivalent pairs: - "The meeting starts at 3pm" and "The 3 o'clock meeting will begin on time" - "Research indicates a 15% increase" and "Studies show a fifteen percent rise" - "was influential in the field" and "had a significant impact on the community" Examples of non-equivalent pairs: - "The project might be completed by Friday" and "The project will be finished by Friday" - "Most experts agree on the approach" and "All experts support the approach" Strictly follow these guidelines and return ONLY: - equivalent - not equivalent """ MATH_SIMILARITY_JUDGE_PROMPT = """ You are given two pieces of text from mathematical solutions. Your task is to determine whether the two solution segments are mathematically equivalent in their content, while allowing for stylistic variations. Here are some important guidelines: - Solutions should be considered equivalent if: 1. They communicate the same mathematical content/approach, even if word choice or phrasing differs 2. They contain the same key mathematical ideas, even if expressed differently 3. The same mathematical steps are described, even if using different words 4. They present the same final answer, regardless of wording style or formatting - Allow for these variations while still considering solutions equivalent: 1. Stylistic differences ("we will" vs. "we'll" or "I'll") 2. Different levels of formality in the explanation 3. Minor rephrasing that preserves the core mathematical content 4. Use of synonyms or alternative mathematical terminology for the same concept - Solutions are NOT equivalent if: 1. They use fundamentally different mathematical approaches 2. They work with different formulas or equations 3. They present different mathematical steps or operations 4. They reach different conclusions or answers 5. One contains substantial mathematical content that the other lacks - When examining final answers, focus on mathematical equivalence rather than stylistic presentation - For solution steps, maintain the core mathematical approach while allowing for rephrasing Examples of solutions that SHOULD be considered equivalent: - "We will systematically evaluate each possible grouping" and "We'll evaluate each grouping" - "The answer is x = 5" and "Therefore, x equals 5" - "Using the quadratic formula" and "Applying the quadratic formula" Strictly follow the guidelines above. Return your judgment in the following format. Do not include any other text: - equivalent - not equivalent """ def path_sim_llm( path1_text: str, path2_text: str, api: str = "openai", model: str = "gpt-4.1-mini", verbose: bool = False, domain: Optional[str] = "text", custom_similarity_judge_prompt: str = None, ): if api == "openai": client = OpenAI() elif api == "hf": client = InferenceClient() else: raise ValueError(f"Invalid API: {api}") if domain == "text": similarity_judge_prompt = ( f"{TEXT_SIMILARITY_JUDGE_PROMPT}\n\nText 1: {path1_text}\nText 2: {path2_text}" ) elif domain == "math": similarity_judge_prompt = ( f"{MATH_SIMILARITY_JUDGE_PROMPT}\n\nText 1: {path1_text}\nText 2: {path2_text}" ) elif not domain and custom_similarity_judge_prompt: similarity_judge_prompt = ( f"{custom_similarity_judge_prompt}\n\nText 1: {path1_text}\nText 2: {path2_text}" ) else: raise ValueError(f"Invalid domain: {domain} and no custom similarity judge prompt provided") completion = client.chat.completions.create( model=model, temperature=0, messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": similarity_judge_prompt}, ], ) judgement = completion.choices[0].message.content.strip() judgement = "".join(c for c in judgement if c.isalpha() or c == " ") judgement = judgement.strip() if verbose: print(f"{path1_text} \nand \n{path2_text} \nare {judgement}") if judgement == "equivalent": return 1, completion.usage.prompt_tokens, completion.usage.completion_tokens elif judgement == "not equivalent": return 0, completion.usage.prompt_tokens, completion.usage.completion_tokens else: if verbose: print(f"Invalid judgement: {judgement}") return 0, completion.usage.prompt_tokens, completion.usage.completion_tokens