Spaces:

eaglelandsonce
/

Research_Compare

Sleeping

App Files Files Community

Research_Compare / app.py

eaglelandsonce

Create app.py

09756c4 verified about 2 months ago

raw

history blame contribute delete

14.3 kB

	import os
	import json
	import tempfile
	from typing import Any, Dict, Tuple

	import gradio as gr
	from pypdf import PdfReader

	try:
	from openai import OpenAI
	except Exception:
	OpenAI = None # handled at runtime

	DEFAULT_MODEL = os.environ.get("OPENAI_MODEL", "gpt-4o-mini")

	PROMPT_TASK_1 = """You are an expert research analyst.

	You will be given the full text of a Notice of Funding Opportunity (NOFO).

	Your task is to identify and extract the PRIMARY topic or focus area for which funding is being provided.

	Instructions:
	- Read the NOFO carefully.
	- Determine the single, highest-level topic that best represents the purpose of the funding.
	- Respond with the topic name ONLY.
	- Do NOT include explanations, descriptions, bullet points, or additional text.
	- Do NOT include punctuation before or after the topic.

	Output Format:
	<topic name only>
	"""

	PROMPT_TASK_2 = """You are an expert research reviewer and grant analyst.

	You will be provided with:
	1. The extracted funding topic from a Notice of Funding Opportunity (NOFO)
	2. The full text of a research paper

	Your task is to determine whether the research paper is RELEVANT to the NOFO topic.

	Relevance Criteria:
	- Domain alignment (field, application area, or population)
	- Methodological alignment (approaches, models, techniques)
	- Theoretical or conceptual alignment
	- Practical applicability to the NOFO’s funding objectives

	Instructions:
	- Carefully analyze the research paper in relation to the NOFO topic.
	- If the paper does NOT meaningfully align by domain, method, theory, or application:
	→ Respond with exactly:
	PAPER NOT RELATED TO TOPIC
	- If the paper IS relevant:
	→ Provide a concise summary of the paper in under 300 words.
	→ Emphasize how the paper supports, informs, or could enable work aligned with the NOFO topic.

	Output Format:
	### Output Format:
	"summary": "<summary of the paper under 300 words OR PAPER NOT RELATED TO TOPIC>"
	"""

	PROMPT_TASK_3 = """You are an expert research strategist and grant proposal architect.

	You will be provided with:
	1. The funding topic extracted from a Notice of Funding Opportunity (NOFO)
	2. The full text of a research paper that has been determined to be RELEVANT to the NOFO

	Your task is to generate research project ideas that could form the basis of a competitive proposal under this NOFO.

	Instructions:
	- Generate EXACTLY five (5) distinct research project ideas.
	- Each idea must clearly align with the NOFO topic and objectives.
	- Each idea must explicitly leverage insights, methods, or findings from the provided research paper.
	- Each idea must be realistic, fundable, and suitable for an academic or applied research proposal.

	For EACH idea, include the following fields in the exact order shown:
	1. Title – a concise project title
	2. Description – a clear explanation of the project idea (3–5 sentences)
	3. Citation – author(s) and year of the research paper
	4. NOFO Alignment – at least two specific ways the project aligns with the NOFO goals or requirements
	5. Research Paper Path – the file path or identifier for the paper used

	Formatting Requirements:
	- Separate each project idea using exactly three dashes:
	---
	- Do NOT add numbering or bullet points.
	- Do NOT include any text before the first idea or after the last idea.
	- Maintain consistent formatting across all five ideas.

	Output Format (repeat exactly five times):

	Title:
	Description:
	Citation:
	NOFO Alignment:
	Research Paper Path:
	"""

	PROMPT_TASK_4 = """You are an expert principal investigator and grant proposal writer.

	You will be provided with:
	1. The Notice of Funding Opportunity (NOFO) topic and objectives
	2. ONE selected research project idea generated in Task 3
	3. The research paper that supports this idea

	Your task is to write a COMPLETE, professional research proposal that is fully aligned with the NOFO requirements.

	Instructions:
	- Write the proposal in a formal grant-writing style.
	- Explicitly align all sections to the NOFO goals, priorities, and evaluation criteria.
	- Clearly show how the proposal builds upon and extends the provided research paper.
	- Assume the audience is a technical and scientific review panel.

	Required Proposal Sections (use these exact headings):

	1. Project Title
	2. Abstract (250–300 words)
	3. Background and Significance
	- Problem statement
	- Prior work and gaps
	- Relevance to the NOFO topic
	4. Objectives and Specific Aims
	5. Research Methodology
	- Study design
	- Data sources
	- Models, tools, or interventions
	6. Innovation
	- What is novel compared to existing work
	7. Expected Outcomes and Impact
	- Scientific, societal, or clinical impact
	8. Evaluation and Validation Plan
	- Metrics, benchmarks, or success criteria
	9. Timeline and Milestones
	- Phased plan over the project duration
	10. Risk Management and Mitigation
	11. Alignment with NOFO Priorities
	- Explicit mapping to funding objectives
	12. References
	- Include the provided research paper

	Constraints:
	- Keep the proposal concise but complete.
	- Avoid generic filler language.
	- Ensure internal consistency across sections.
	- Do NOT include budget or personnel details unless explicitly stated in the NOFO.

	Output Requirements:
	- Use clear section headings exactly as listed.
	- Do NOT include commentary, analysis, or explanations outside the proposal text.
	- Return the proposal text ONLY.
	"""

	PROMPT_TASK_5 = """You are an expert grant reviewer serving on a competitive scientific review panel.

	You will be provided with:
	1. The Notice of Funding Opportunity (NOFO)
	2. A complete research proposal written in response to the NOFO

	Your task is to critically evaluate the proposal using standard peer-review criteria.

	Evaluation Criteria:
	Evaluate the proposal on the following four dimensions:

	1. Innovation
	2. Significance
	3. Approach
	4. Investigator Expertise

	Scoring Instructions:
	- Assign a numeric score from 1 to 5 for EACH criterion:
	1 = Poor
	2 = Fair
	3 = Good
	4 = Very Good
	5 = Excellent
	- Scores must reflect how competitive the proposal would be in a real funding review.
	- Be objective, specific, and evidence-based.

	For EACH criterion, provide:
	- Score (integer 1–5)
	- Justification (3–5 sentences)
	- Key Strengths (bullet list)
	- Key Weaknesses (bullet list)
	- Actionable Recommendations for Improvement

	Output Format:
	Return VALID JSON only. Do NOT include markdown, explanations, or additional text.

	Use the following JSON structure exactly:

	{
	"Innovation": {
	"score": <1-5>,
	"justification": "<text>",
	"strengths": ["<item>", "<item>"],
	"weaknesses": ["<item>", "<item>"],
	"recommendations": ["<item>", "<item>"]
	},
	"Significance": {
	"score": <1-5>,
	"justification": "<text>",
	"strengths": ["<item>", "<item>"],
	"weaknesses": ["<item>", "<item>"],
	"recommendations": ["<item>", "<item>"]
	},
	"Approach": {
	"score": <1-5>,
	"justification": "<text>",
	"strengths": ["<item>", "<item>"],
	"weaknesses": ["<item>", "<item>"],
	"recommendations": ["<item>", "<item>"]
	},
	"Investigator Expertise": {
	"score": <1-5>,
	"justification": "<text>",
	"strengths": ["<item>", "<item>"],
	"weaknesses": ["<item>", "<item>"],
	"recommendations": ["<item>", "<item>"]
	}
	}
	"""

	SYSTEM_BASE = "You are precise, structured, and follow formatting rules exactly."


	def extract_text_from_pdf(pdf_path: str) -> str:
	reader = PdfReader(pdf_path)
	parts = []
	for page in reader.pages:
	try:
	txt = page.extract_text() or ""
	except Exception:
	txt = ""
	if txt.strip():
	parts.append(txt)
	text = "\n\n".join(parts).strip()
	if len(text) < 600:
	raise ValueError(
	"Insufficient extractable text. Please upload a text-based PDF (selectable text), not a scanned PDF."
	)
	return text


	def openai_chat(api_key: str, system: str, user: str, model: str) -> str:
	if OpenAI is None:
	raise RuntimeError("OpenAI SDK is not installed. Please install 'openai'.")
	if not api_key or not api_key.strip():
	raise ValueError("Please provide an OpenAI API key.")
	client = OpenAI(api_key=api_key.strip())
	resp = client.chat.completions.create(
	model=model,
	messages=[
	{"role": "system", "content": system},
	{"role": "user", "content": user},
	],
	temperature=0.2,
	)
	return (resp.choices[0].message.content or "").strip()


	def run_pipeline(api_key: str, model: str, nofo_text: str, paper_text: str, paper_path: str):
	# Task 1
	topic = openai_chat(api_key, SYSTEM_BASE, PROMPT_TASK_1 + "\n\nNOFO TEXT:\n\n" + nofo_text, model)
	topic = " ".join(topic.split()).strip()

	# Task 2
	t2_out = openai_chat(
	api_key,
	SYSTEM_BASE,
	PROMPT_TASK_2 + "\n\nNOFO TOPIC:\n" + topic + "\n\nRESEARCH PAPER TEXT:\n\n" + paper_text,
	model,
	).strip()

	not_related = (t2_out.strip() == "PAPER NOT RELATED TO TOPIC") or ("PAPER NOT RELATED TO TOPIC" in t2_out)

	if not_related:
	t3_out = "SKIPPED - PAPER NOT RELATED TO TOPIC"
	t4_out = "SKIPPED - PAPER NOT RELATED TO TOPIC"
	t5_raw = "SKIPPED - PAPER NOT RELATED TO TOPIC"
	t5_view = t5_raw
	else:
	# Task 3
	t3_out = openai_chat(
	api_key,
	SYSTEM_BASE,
	PROMPT_TASK_3
	+ "\n\nNOFO TOPIC:\n"
	+ topic
	+ "\n\nRESEARCH PAPER TEXT:\n\n"
	+ paper_text
	+ "\n\nRESEARCH PAPER PATH:\n"
	+ (paper_path or "uploaded_paper.pdf"),
	model,
	).strip()

	# First idea for Task 4
	first_idea = t3_out.split("\n---\n")[0].strip() if "\n---\n" in t3_out else t3_out.split("---")[0].strip()

	# Task 4
	t4_out = openai_chat(
	api_key,
	SYSTEM_BASE,
	PROMPT_TASK_4
	+ "\n\nNOFO TOPIC:\n"
	+ topic
	+ "\n\nSELECTED IDEA (FROM TASK 3):\n\n"
	+ first_idea
	+ "\n\nRESEARCH PAPER TEXT:\n\n"
	+ paper_text
	+ "\n\nNOFO TEXT:\n\n"
	+ nofo_text,
	model,
	).strip()

	# Task 5
	t5_raw = openai_chat(
	api_key,
	SYSTEM_BASE,
	PROMPT_TASK_5 + "\n\nNOFO TEXT:\n\n" + nofo_text + "\n\nPROPOSAL TEXT:\n\n" + t4_out,
	model,
	).strip()

	# JSON viewer parsing
	try:
	t5_view = json.loads(t5_raw)
	except Exception:
	start, end = t5_raw.find("{"), t5_raw.rfind("}")
	if start != -1 and end != -1 and end > start:
	candidate = t5_raw[start : end + 1]
	try:
	t5_view = json.loads(candidate)
	t5_raw = candidate
	except Exception:
	t5_view = {"error": "Invalid JSON returned by model", "raw": t5_raw}
	else:
	t5_view = {"error": "Invalid JSON returned by model", "raw": t5_raw}

	results: Dict[str, Any] = {
	"model": model,
	"task_1_topic": topic,
	"task_2_relevance_summary": t2_out,
	"task_3_project_ideas": t3_out,
	"task_4_full_proposal": t4_out,
	"task_5_review_scores_json": t5_raw,
	}

	fd, out_path = tempfile.mkstemp(prefix="nofo_paper_results_", suffix=".json")
	os.close(fd)
	with open(out_path, "w", encoding="utf-8") as f:
	json.dump(results, f, ensure_ascii=False, indent=2)

	return topic, t2_out, t3_out, t4_out, t5_view, out_path


	def run_analysis(api_key, model, nofo_file, paper_file, nofo_state, paper_state, paper_path_state):
	# Update cached texts if new files are uploaded
	if nofo_file is not None:
	nofo_state = extract_text_from_pdf(nofo_file.name)
	if paper_file is not None:
	paper_state = extract_text_from_pdf(paper_file.name)
	paper_path_state = paper_file.name

	if not nofo_state.strip():
	return "", "ERROR: Upload a NOFO PDF.", "", "", {"error": "missing NOFO"}, None, nofo_state, paper_state, paper_path_state
	if not paper_state.strip():
	return "", "ERROR: Upload a Research Paper PDF.", "", "", {"error": "missing paper"}, None, nofo_state, paper_state, paper_path_state

	topic, t2, t3, t4, t5, dl = run_pipeline(api_key, model, nofo_state, paper_state, paper_path_state)
	return topic, t2, t3, t4, t5, dl, nofo_state, paper_state, paper_path_state


	with gr.Blocks(title="NOFO ↔ Paper Comparator (5-Task Pipeline)") as demo:
	gr.Markdown("# NOFO ↔ Paper Comparator (5-Task Pipeline)")

	nofo_text_state = gr.State("")
	paper_text_state = gr.State("")
	paper_path_state = gr.State("")

	with gr.Row():
	api_key = gr.Textbox(label="OpenAI API Key", type="password", placeholder="sk-...")
	model = gr.Textbox(label="Model", value=DEFAULT_MODEL)

	with gr.Row():
	nofo_pdf = gr.File(label="NOFO PDF (drag & drop)", file_types=[".pdf"])
	paper_pdf = gr.File(label="Research Paper PDF (drag & drop)", file_types=[".pdf"])

	run_btn = gr.Button("Run Analysis", variant="primary")
	clear_btn = gr.Button("Clear")

	gr.Markdown("## Results")
	task1_out = gr.Textbox(label="Task 1: Topic", lines=2)
	task2_out = gr.Textbox(label="Task 2: Relevance Summary", lines=8)
	task3_out = gr.Textbox(label="Task 3: 5 Project Ideas", lines=14)
	task4_out = gr.Markdown(label="Task 4: Full Proposal")
	task5_out = gr.JSON(label="Task 5: Review Scores (JSON)")
	download_out = gr.File(label="Download Results (JSON)")

	run_btn.click(
	fn=run_analysis,
	inputs=[api_key, model, nofo_pdf, paper_pdf, nofo_text_state, paper_text_state, paper_path_state],
	outputs=[task1_out, task2_out, task3_out, task4_out, task5_out, download_out, nofo_text_state, paper_text_state, paper_path_state],
	)

	clear_btn.click(
	fn=lambda: ("", DEFAULT_MODEL, None, None, "", "", "", "", {}, None, "", "", ""),
	inputs=[],
	outputs=[api_key, model, nofo_pdf, paper_pdf, task1_out, task2_out, task3_out, task4_out, task5_out, download_out, nofo_text_state, paper_text_state, paper_path_state],
	)

	if __name__ == "__main__":
	demo.launch()