Spaces:

Tirath5504
/

MetaSearch

Sleeping

App Files Files Community

Tirath5504 commited on Nov 29, 2025

Commit

6c03205

verified ·

1 Parent(s): 64acd41

Update pipeline/disagreement_resolution.py

Browse files

Files changed (1) hide show

pipeline/disagreement_resolution.py +114 -95

pipeline/disagreement_resolution.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import json
 import os
 from typing import List, Dict
 from openai import OpenAI
 from pydantic import BaseModel
@@ -14,6 +15,16 @@ client = OpenAI(
     api_key=os.getenv("OPENROUTER_API_KEY"),
 )
 class ResolutionDetails(BaseModel):
     accepted_critique_points: Dict[str, List[str]]
     rejected_critique_points: Dict[str, List[str]]
@@ -33,22 +44,13 @@ def construct_resolution_prompt(
 ) -> tuple:
     """
     Construct prompt for disagreement resolution
-    Args:
-        paper_title: Title of the paper
-        paper_abstract: Abstract of the paper
-        disagreement: Disagreement analysis results
-        combined_critiques: Combined critique points
-        sota_results: State-of-the-art findings
-        retrieved_evidence: Retrieved evidence per category
-    Returns:
-        Tuple of (system_prompt, user_prompt)
     """
     system_prompt = """
     You are an AI specialized in resolving academic peer review disagreements.
     Your task is to analyze critiques, verify evidence, and provide a structured resolution.
     Respond in the following JSON format:
     {
       "accepted_critique_points": {"category": ["critique_1", "critique_2"]},
@@ -73,14 +75,11 @@ def construct_resolution_prompt(
     - **Novelty:** {', '.join(disagreement_details.get('Novelty', ['N/A']))}
     ### **Supporting Information**
-    **Combined Critique Points from Reviews:**
-    {json.dumps(combined_critiques, indent=2)}
-    **State-of-the-Art (SoTA) Findings:**
-    {sota_results[:2000]}
-    **Retrieved Evidence:**
-    {json.dumps(retrieved_evidence, indent=2)[:2000]}
     ### **Resolution Task**
     1. Validate critique points and categorize them into accepted or rejected.
@@ -92,6 +91,37 @@ def construct_resolution_prompt(
     return system_prompt, user_prompt
 async def resolve_single_disagreement(
     paper_title: str,
     paper_abstract: str,
@@ -99,22 +129,10 @@ async def resolve_single_disagreement(
     combined_critiques: Dict,
     sota_results: str,
     retrieved_evidence: Dict,
-    retries: int = 5
 ) -> Dict:
     """
-    Resolve a single disagreement using DeepSeek-R1
-    Args:
-        paper_title: Paper title
-        paper_abstract: Paper abstract
-        disagreement: Disagreement analysis
-        combined_critiques: Combined critique points
-        sota_results: SoTA findings
-        retrieved_evidence: Evidence per category
-        retries: Maximum retry attempts
-    Returns:
-        Resolution results
     """
     system_prompt, user_prompt = construct_resolution_prompt(
         paper_title,
@@ -130,61 +148,72 @@ async def resolve_single_disagreement(
         {"role": "user", "content": user_prompt},
     ]
-    for attempt in range(retries):
-        try:
-            response = await asyncio.to_thread(
-                client.chat.completions.create,
-                model="deepseek/deepseek-r1",
-                messages=messages,
-                response_format={"type": "json_object"},
-            )
-            if not response.choices or not response.choices[0].message.content.strip():
-                raise ValueError("Empty response from DeepSeek-R1")
-            # Parse response (remove potential prefix)
-            content = response.choices[0].message.content.strip()
-            if content.startswith("```json"):
-                content = content[7:-3].strip()
-            elif content.startswith("```"):
-                content = content[3:-3].strip()
-            llm_output = json.loads(content)
-            # Validate required keys
-            required_keys = {
-                "accepted_critique_points",
-                "rejected_critique_points",
-                "final_resolution_summary"
-            }
-            if not required_keys.issubset(llm_output.keys()):
-                raise ValueError(f"Missing keys. Present: {llm_output.keys()}")
-            # Validate structure
-            resolution = DisagreementResolutionResult(
-                review_pair=disagreement.get('review_pair', [0, 1]),
-                resolution_details=ResolutionDetails(**llm_output)
-            )
-            return resolution.model_dump()
-        except Exception as e:
-            wait_time = 2 ** attempt
-            print(f"Resolution attempt {attempt + 1} failed: {e}")
-            if attempt < retries - 1:
-                await asyncio.sleep(wait_time)
-            else:
-                return {
-                    "review_pair": disagreement.get('review_pair', [0, 1]),
-                    "resolution_details": {
-                        "accepted_critique_points": {},
-                        "rejected_critique_points": {},
-                        "final_resolution_summary": f"Error: {str(e)}"
-                    },
-                    "error": str(e)
                 }
 async def resolve_disagreements(
     paper_title: str,
@@ -195,16 +224,6 @@ async def resolve_disagreements(
 ) -> List[Dict]:
     """
     Resolve all disagreements
-    Args:
-        paper_title: Paper title
-        paper_abstract: Paper abstract
-        disagreements: List of disagreement analyses
-        critique_points: List of critique points
-        search_results: Search and retrieval results
-    Returns:
-        List of resolution results
     """
     if not disagreements:
         return []

 import json
 import os
+import re
 from typing import List, Dict
 from openai import OpenAI
 from pydantic import BaseModel
     api_key=os.getenv("OPENROUTER_API_KEY"),
 )
+# Priority list of models to try
+# 1. DeepSeek R1 (Best reasoning, most expensive)
+# 2. DeepSeek R1 Distill (Good reasoning, cheaper)
+# 3. Gemini 2.0 Flash (Free/Cheap, very fast fallback)
+MODELS = [
+    "deepseek/deepseek-r1",
+    "deepseek/deepseek-r1-distill-llama-70b",
+    "google/gemini-2.0-flash-exp:free"
+]
 class ResolutionDetails(BaseModel):
     accepted_critique_points: Dict[str, List[str]]
     rejected_critique_points: Dict[str, List[str]]
 ) -> tuple:
     """
     Construct prompt for disagreement resolution
     """
     system_prompt = """
     You are an AI specialized in resolving academic peer review disagreements.
     Your task is to analyze critiques, verify evidence, and provide a structured resolution.
+    IMPORTANT: detailed reasoning is allowed, but the FINAL output must be valid JSON only.
     Respond in the following JSON format:
     {
       "accepted_critique_points": {"category": ["critique_1", "critique_2"]},
     - **Novelty:** {', '.join(disagreement_details.get('Novelty', ['N/A']))}
     ### **Supporting Information**
+    **Combined Critique Points from Reviews:** {json.dumps(combined_critiques, indent=2)}
+    **State-of-the-Art (SoTA) Findings:** {sota_results[:2000]}
+    **Retrieved Evidence:** {json.dumps(retrieved_evidence, indent=2)[:2000]}
     ### **Resolution Task**
     1. Validate critique points and categorize them into accepted or rejected.
     return system_prompt, user_prompt
+def extract_json_from_text(text: str) -> Dict:
+    """
+    Robustly extract JSON from text that might contain markdown or thinking traces.
+    """
+    try:
+        # 1. Try straightforward parse
+        return json.loads(text)
+    except json.JSONDecodeError:
+        pass
+    # 2. Try removing markdown code blocks
+    if "```json" in text:
+        pattern = r"```json(.*?)```"
+        match = re.search(pattern, text, re.DOTALL)
+        if match:
+            try:
+                return json.loads(match.group(1).strip())
+            except:
+                pass
+    # 3. Regex search for the outermost curly braces
+    # This handles cases where DeepSeek outputs <think>...</think> before the JSON
+    try:
+        match = re.search(r"(\{.*\})", text, re.DOTALL)
+        if match:
+            return json.loads(match.group(1))
+    except:
+        pass
+    raise ValueError("Could not extract valid JSON from model response")
 async def resolve_single_disagreement(
     paper_title: str,
     paper_abstract: str,
     combined_critiques: Dict,
     sota_results: str,
     retrieved_evidence: Dict,
+    retries: int = 3  # Reduced retries since we have model fallback
 ) -> Dict:
     """
+    Resolve a single disagreement with Model Fallback and Token Limits
     """
     system_prompt, user_prompt = construct_resolution_prompt(
         paper_title,
         {"role": "user", "content": user_prompt},
     ]
+    last_exception = None
+    # Loop through available models in case of error (402 Payment, 429 Rate Limit)
+    for model in MODELS:
+        print(f"Attempting resolution with model: {model}")
+        for attempt in range(retries):
+            try:
+                response = await asyncio.to_thread(
+                    client.chat.completions.create,
+                    model=model,
+                    messages=messages,
+                    # CRITICAL FIX: Limit max_tokens to prevent "Insufficient Credits" error
+                    # OpenRouter reserves credits based on this number.
+                    max_tokens=4096,
+                    response_format={"type": "json_object"},
+                )
+                if not response.choices or not response.choices[0].message.content.strip():
+                    raise ValueError("Empty response from AI")
+                content = response.choices[0].message.content.strip()
+                llm_output = extract_json_from_text(content)
+                # Validate required keys
+                required_keys = {
+                    "accepted_critique_points",
+                    "rejected_critique_points",
+                    "final_resolution_summary"
                 }
+                if not required_keys.issubset(llm_output.keys()):
+                    raise ValueError(f"Missing keys. Present: {llm_output.keys()}")
+                # Validate structure
+                resolution = DisagreementResolutionResult(
+                    review_pair=disagreement.get('review_pair', [0, 1]),
+                    resolution_details=ResolutionDetails(**llm_output)
+                )
+                return resolution.model_dump()
+            except Exception as e:
+                last_exception = e
+                error_msg = str(e)
+                print(f"Model {model} - Attempt {attempt + 1} failed: {error_msg}")
+                # Immediate fallback on payment errors
+                if "402" in error_msg or "insufficient_quota" in error_msg:
+                    print("Insufficient credits detected. Switching to cheaper model...")
+                    break # Break retry loop to go to next model
+                wait_time = 2 ** attempt
+                if attempt < retries - 1:
+                    await asyncio.sleep(wait_time)
+    # If all models and retries fail
+    return {
+        "review_pair": disagreement.get('review_pair', [0, 1]),
+        "resolution_details": {
+            "accepted_critique_points": {},
+            "rejected_critique_points": {},
+            "final_resolution_summary": f"Failed to resolve disagreement after trying multiple models. Final Error: {str(last_exception)}"
+        },
+        "error": str(last_exception)
+    }
 async def resolve_disagreements(
     paper_title: str,
 ) -> List[Dict]:
     """
     Resolve all disagreements
     """
     if not disagreements:
         return []