Spaces:

syaikhipin
/

PaperReview

Sleeping

PaperReview / agents.py

Nur Arifin Akbar

Add rate limiting and API key management

9c12608 3 months ago

11.2 kB

	"""Multi-agent system for literature review using OpenAI-compatible API."""

	import json
	import re
	import os
	import time
	from typing import Any, Optional, Dict, Tuple
	from openai import OpenAI


	def extract_json_between_markers(llm_output: str) -> Optional[Dict[str, Any]]:
	"""Extracts JSON content from a string, typically an LLM output."""
	json_pattern = r"```json(.*?)```"
	matches = re.findall(json_pattern, llm_output, re.DOTALL)

	if not matches:
	json_pattern_fallback = r"\{[^{}]*\}"
	matches = re.findall(json_pattern_fallback, llm_output, re.DOTALL)

	for json_string in matches:
	json_string = json_string.strip()
	try:
	parsed_json = json.loads(json_string)
	return parsed_json
	except json.JSONDecodeError:
	try:
	json_string_clean = "".join(
	char for char in json_string if ord(char) >= 32 and ord(char) != 127
	)
	parsed_json = json.loads(json_string_clean)
	return parsed_json
	except json.JSONDecodeError:
	continue

	return None


	def query_model(system_prompt: str, prompt: str, client: OpenAI, model: str) -> Optional[str]:
	"""Query the model with the given prompts using OpenAI-compatible API with rate limiting."""
	try:
	# Rate limiting: 1 request per second to avoid concurrency issues
	time.sleep(1)

	response = client.chat.completions.create(
	model=model,
	messages=[
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": prompt}
	],
	temperature=0.7,
	max_tokens=4000
	)
	return response.choices[0].message.content
	except Exception as e:
	print(f"Error querying model: {e}")
	# Wait before retry
	time.sleep(2)
	return None


	def get_score(
	paper_content: str,
	reviewer_type: Optional[str] = None,
	attempts: int = 3,
	client: OpenAI = None,
	model: str = None,
	) -> Tuple[Optional[float], str, bool]:
	"""Evaluates a research paper using an LLM reviewer."""

	last_exception_message = ""
	for attempt in range(attempts):
	try:
	template_instructions = """
	Respond in the following format:

	THOUGHT:
	<THOUGHT>

	REVIEW JSON:
	```json
	<JSON>
	```

	In <THOUGHT>, first briefly discuss your intuitions and reasoning for the evaluation.
	Detail your high-level arguments, necessary choices and desired outcomes of the review.

	In <JSON>, provide the review in JSON format with the following fields:
	- "Summary": A summary of the paper content and its contributions.
	- "Strengths": A list of strengths of the paper.
	- "Weaknesses": A list of weaknesses of the paper.
	- "Originality": A rating from 1 to 4 (low, medium, high, very high).
	- "Quality": A rating from 1 to 4 (low, medium, high, very high).
	- "Clarity": A rating from 1 to 4 (low, medium, high, very high).
	- "Significance": A rating from 1 to 4 (low, medium, high, very high).
	- "Questions": A set of clarifying questions to be answered by the paper authors.
	- "Limitations": A set of limitations and potential negative societal impacts.
	- "Ethical Concerns": A boolean value indicating whether there are ethical concerns.
	- "Soundness": A rating from 1 to 4 (poor, fair, good, excellent).
	- "Presentation": A rating from 1 to 4 (poor, fair, good, excellent).
	- "Contribution": A rating from 1 to 4 (poor, fair, good, excellent).
	- "Overall": A rating from 1 to 10 (very strong reject to award quality).
	- "Confidence": A rating from 1 to 5 (low, medium, high, very high, absolute).
	- "Decision": A decision that has to be one of: Accept, Reject.
	"""

	neurips_form = """
	## Review Guidelines

	Evaluate the paper across these dimensions:

	1. Originality: Are the ideas novel? Is related work cited?
	2. Quality: Is the work technically sound? Are claims well supported?
	3. Clarity: Is the paper well-written and organized?
	4. Significance: Are the results important? Will others build on this work?
	5. Soundness: Rate the technical quality (1-4: poor, fair, good, excellent)
	6. Presentation: Rate the writing quality (1-4: poor, fair, good, excellent)
	7. Contribution: Rate the overall contribution (1-4: poor, fair, good, excellent)
	8. Overall Score: Rate 1-10 where:
	- 1-3: Reject
	- 4-6: Borderline
	- 7-8: Accept
	- 9-10: Strong Accept

	""" + template_instructions

	if reviewer_type is None:
	reviewer_type = ""

	sys_prompt = (
	f"You are an AI researcher reviewing an academic paper. "
	f"Be critical and thorough in your assessment. {reviewer_type}\n"
	) + neurips_form

	prompt = f"Review the following paper:\n\n{paper_content}\n\n"

	review_output = query_model(
	system_prompt=sys_prompt,
	prompt=prompt,
	client=client,
	model=model,
	)

	if review_output is None:
	raise ValueError("LLM query returned None.")

	review_json = extract_json_between_markers(review_output)

	if review_json is None:
	raise ValueError("Could not extract JSON review from LLM output.")

	required_keys = [
	"Overall", "Soundness", "Confidence", "Contribution",
	"Presentation", "Clarity", "Originality", "Quality", "Significance",
	]

	for key in required_keys:
	if key not in review_json:
	raise KeyError(f"Missing key '{key}' in review JSON.")

	# Calculate weighted score
	overall = int(review_json["Overall"]) / 10.0
	soundness = int(review_json["Soundness"]) / 4.0
	confidence = int(review_json["Confidence"]) / 5.0
	contribution = int(review_json["Contribution"]) / 4.0
	presentation = int(review_json["Presentation"]) / 4.0
	clarity = int(review_json["Clarity"]) / 4.0
	originality = int(review_json["Originality"]) / 4.0
	quality = int(review_json["Quality"]) / 4.0
	significance = int(review_json["Significance"]) / 4.0

	weights = {
	"clarity": 0.1,
	"quality": 0.1,
	"overall": 1.0,
	"soundness": 0.1,
	"confidence": 0.1,
	"originality": 0.1,
	"significance": 0.1,
	"contribution": 0.4,
	"presentation": 0.2,
	}

	max_score = sum(weights.values())

	performance = (
	weights["soundness"] * soundness +
	weights["presentation"] * presentation +
	weights["confidence"] * confidence +
	weights["contribution"] * contribution +
	weights["overall"] * overall +
	weights["originality"] * originality +
	weights["significance"] * significance +
	weights["clarity"] * clarity +
	weights["quality"] * quality
	) / max_score * 10.0

	return (
	performance,
	f"Performance Score: {performance:.2f}/10\n\n{review_output}",
	True,
	)

	except Exception as e:
	print(f"Error in get_score (attempt {attempt + 1}/{attempts}): {e}")
	last_exception_message = str(e)

	return (
	None,
	f"Failed to get score after {attempts} attempts. Last error: {last_exception_message}",
	False,
	)


	class ReviewerAgent:
	"""Agent that simulates a single reviewer with specific persona."""

	def __init__(self, client: OpenAI, model: str, persona: str, name: str):
	self.client = client
	self.model = model
	self.persona = persona
	self.name = name

	def review_paper(self, paper_content: str) -> Dict[str, Any]:
	"""Generate review for the paper."""
	score, review_text, success = get_score(
	paper_content=paper_content,
	reviewer_type=self.persona,
	client=self.client,
	model=self.model,
	)

	return {
	"reviewer": self.name,
	"score": score,
	"review": review_text,
	"success": success
	}


	class MultiReviewerSystem:
	"""System that coordinates multiple reviewer agents."""

	def __init__(self, api_key: str, base_url: str, model: str):
	self.client = OpenAI(api_key=api_key, base_url=base_url)
	self.model = model

	self.reviewers = [
	ReviewerAgent(
	client=self.client,
	model=self.model,
	persona="You focus on experimental rigor and expect well-designed experiments with clear insights.",
	name="Reviewer 1: Experimentalist"
	),
	ReviewerAgent(
	client=self.client,
	model=self.model,
	persona="You look for impactful ideas that would advance the field significantly.",
	name="Reviewer 2: Impactist"
	),
	ReviewerAgent(
	client=self.client,
	model=self.model,
	persona="You seek novel ideas that have not been proposed before and creative approaches.",
	name="Reviewer 3: Novelty Seeker"
	)
	]

	def review_paper_sequential(self, paper_content: str, progress_callback=None) -> Dict[str, Any]:
	"""Generate reviews from multiple reviewers sequentially."""
	reviews = []
	total_score = 0
	successful_reviews = 0

	for i, reviewer in enumerate(self.reviewers):
	if progress_callback:
	progress_callback(i / len(self.reviewers), f"Reviewing with {reviewer.name}...")

	review_result = reviewer.review_paper(paper_content)
	reviews.append(review_result)

	if review_result["success"] and review_result["score"] is not None:
	total_score += review_result["score"]
	successful_reviews += 1

	avg_score = total_score / successful_reviews if successful_reviews > 0 else 0

	if progress_callback:
	progress_callback(1.0, "Review complete!")

	return {
	"reviews": reviews,
	"average_score": avg_score,
	"total_reviewers": len(self.reviewers),
	"successful_reviews": successful_reviews
	}