eaglelandsonce's picture
Create app.py
09756c4 verified
import os
import json
import tempfile
from typing import Any, Dict, Tuple
import gradio as gr
from pypdf import PdfReader
try:
from openai import OpenAI
except Exception:
OpenAI = None # handled at runtime
DEFAULT_MODEL = os.environ.get("OPENAI_MODEL", "gpt-4o-mini")
PROMPT_TASK_1 = """You are an expert research analyst.
You will be given the full text of a Notice of Funding Opportunity (NOFO).
Your task is to identify and extract the PRIMARY topic or focus area for which funding is being provided.
Instructions:
- Read the NOFO carefully.
- Determine the single, highest-level topic that best represents the purpose of the funding.
- Respond with the topic name ONLY.
- Do NOT include explanations, descriptions, bullet points, or additional text.
- Do NOT include punctuation before or after the topic.
Output Format:
<topic name only>
"""
PROMPT_TASK_2 = """You are an expert research reviewer and grant analyst.
You will be provided with:
1. The extracted funding topic from a Notice of Funding Opportunity (NOFO)
2. The full text of a research paper
Your task is to determine whether the research paper is RELEVANT to the NOFO topic.
Relevance Criteria:
- Domain alignment (field, application area, or population)
- Methodological alignment (approaches, models, techniques)
- Theoretical or conceptual alignment
- Practical applicability to the NOFO’s funding objectives
Instructions:
- Carefully analyze the research paper in relation to the NOFO topic.
- If the paper does NOT meaningfully align by domain, method, theory, or application:
β†’ Respond with exactly:
PAPER NOT RELATED TO TOPIC
- If the paper IS relevant:
β†’ Provide a concise summary of the paper in under 300 words.
β†’ Emphasize how the paper supports, informs, or could enable work aligned with the NOFO topic.
Output Format:
### Output Format:
"summary": "<summary of the paper under 300 words OR PAPER NOT RELATED TO TOPIC>"
"""
PROMPT_TASK_3 = """You are an expert research strategist and grant proposal architect.
You will be provided with:
1. The funding topic extracted from a Notice of Funding Opportunity (NOFO)
2. The full text of a research paper that has been determined to be RELEVANT to the NOFO
Your task is to generate research project ideas that could form the basis of a competitive proposal under this NOFO.
Instructions:
- Generate EXACTLY five (5) distinct research project ideas.
- Each idea must clearly align with the NOFO topic and objectives.
- Each idea must explicitly leverage insights, methods, or findings from the provided research paper.
- Each idea must be realistic, fundable, and suitable for an academic or applied research proposal.
For EACH idea, include the following fields in the exact order shown:
1. Title – a concise project title
2. Description – a clear explanation of the project idea (3–5 sentences)
3. Citation – author(s) and year of the research paper
4. NOFO Alignment – at least two specific ways the project aligns with the NOFO goals or requirements
5. Research Paper Path – the file path or identifier for the paper used
Formatting Requirements:
- Separate each project idea using exactly three dashes:
---
- Do NOT add numbering or bullet points.
- Do NOT include any text before the first idea or after the last idea.
- Maintain consistent formatting across all five ideas.
Output Format (repeat exactly five times):
Title:
Description:
Citation:
NOFO Alignment:
Research Paper Path:
"""
PROMPT_TASK_4 = """You are an expert principal investigator and grant proposal writer.
You will be provided with:
1. The Notice of Funding Opportunity (NOFO) topic and objectives
2. ONE selected research project idea generated in Task 3
3. The research paper that supports this idea
Your task is to write a COMPLETE, professional research proposal that is fully aligned with the NOFO requirements.
Instructions:
- Write the proposal in a formal grant-writing style.
- Explicitly align all sections to the NOFO goals, priorities, and evaluation criteria.
- Clearly show how the proposal builds upon and extends the provided research paper.
- Assume the audience is a technical and scientific review panel.
Required Proposal Sections (use these exact headings):
1. Project Title
2. Abstract (250–300 words)
3. Background and Significance
- Problem statement
- Prior work and gaps
- Relevance to the NOFO topic
4. Objectives and Specific Aims
5. Research Methodology
- Study design
- Data sources
- Models, tools, or interventions
6. Innovation
- What is novel compared to existing work
7. Expected Outcomes and Impact
- Scientific, societal, or clinical impact
8. Evaluation and Validation Plan
- Metrics, benchmarks, or success criteria
9. Timeline and Milestones
- Phased plan over the project duration
10. Risk Management and Mitigation
11. Alignment with NOFO Priorities
- Explicit mapping to funding objectives
12. References
- Include the provided research paper
Constraints:
- Keep the proposal concise but complete.
- Avoid generic filler language.
- Ensure internal consistency across sections.
- Do NOT include budget or personnel details unless explicitly stated in the NOFO.
Output Requirements:
- Use clear section headings exactly as listed.
- Do NOT include commentary, analysis, or explanations outside the proposal text.
- Return the proposal text ONLY.
"""
PROMPT_TASK_5 = """You are an expert grant reviewer serving on a competitive scientific review panel.
You will be provided with:
1. The Notice of Funding Opportunity (NOFO)
2. A complete research proposal written in response to the NOFO
Your task is to critically evaluate the proposal using standard peer-review criteria.
Evaluation Criteria:
Evaluate the proposal on the following four dimensions:
1. Innovation
2. Significance
3. Approach
4. Investigator Expertise
Scoring Instructions:
- Assign a numeric score from 1 to 5 for EACH criterion:
1 = Poor
2 = Fair
3 = Good
4 = Very Good
5 = Excellent
- Scores must reflect how competitive the proposal would be in a real funding review.
- Be objective, specific, and evidence-based.
For EACH criterion, provide:
- Score (integer 1–5)
- Justification (3–5 sentences)
- Key Strengths (bullet list)
- Key Weaknesses (bullet list)
- Actionable Recommendations for Improvement
Output Format:
Return VALID JSON only. Do NOT include markdown, explanations, or additional text.
Use the following JSON structure exactly:
{
"Innovation": {
"score": <1-5>,
"justification": "<text>",
"strengths": ["<item>", "<item>"],
"weaknesses": ["<item>", "<item>"],
"recommendations": ["<item>", "<item>"]
},
"Significance": {
"score": <1-5>,
"justification": "<text>",
"strengths": ["<item>", "<item>"],
"weaknesses": ["<item>", "<item>"],
"recommendations": ["<item>", "<item>"]
},
"Approach": {
"score": <1-5>,
"justification": "<text>",
"strengths": ["<item>", "<item>"],
"weaknesses": ["<item>", "<item>"],
"recommendations": ["<item>", "<item>"]
},
"Investigator Expertise": {
"score": <1-5>,
"justification": "<text>",
"strengths": ["<item>", "<item>"],
"weaknesses": ["<item>", "<item>"],
"recommendations": ["<item>", "<item>"]
}
}
"""
SYSTEM_BASE = "You are precise, structured, and follow formatting rules exactly."
def extract_text_from_pdf(pdf_path: str) -> str:
reader = PdfReader(pdf_path)
parts = []
for page in reader.pages:
try:
txt = page.extract_text() or ""
except Exception:
txt = ""
if txt.strip():
parts.append(txt)
text = "\n\n".join(parts).strip()
if len(text) < 600:
raise ValueError(
"Insufficient extractable text. Please upload a text-based PDF (selectable text), not a scanned PDF."
)
return text
def openai_chat(api_key: str, system: str, user: str, model: str) -> str:
if OpenAI is None:
raise RuntimeError("OpenAI SDK is not installed. Please install 'openai'.")
if not api_key or not api_key.strip():
raise ValueError("Please provide an OpenAI API key.")
client = OpenAI(api_key=api_key.strip())
resp = client.chat.completions.create(
model=model,
messages=[
{"role": "system", "content": system},
{"role": "user", "content": user},
],
temperature=0.2,
)
return (resp.choices[0].message.content or "").strip()
def run_pipeline(api_key: str, model: str, nofo_text: str, paper_text: str, paper_path: str):
# Task 1
topic = openai_chat(api_key, SYSTEM_BASE, PROMPT_TASK_1 + "\n\nNOFO TEXT:\n\n" + nofo_text, model)
topic = " ".join(topic.split()).strip()
# Task 2
t2_out = openai_chat(
api_key,
SYSTEM_BASE,
PROMPT_TASK_2 + "\n\nNOFO TOPIC:\n" + topic + "\n\nRESEARCH PAPER TEXT:\n\n" + paper_text,
model,
).strip()
not_related = (t2_out.strip() == "PAPER NOT RELATED TO TOPIC") or ("PAPER NOT RELATED TO TOPIC" in t2_out)
if not_related:
t3_out = "SKIPPED - PAPER NOT RELATED TO TOPIC"
t4_out = "SKIPPED - PAPER NOT RELATED TO TOPIC"
t5_raw = "SKIPPED - PAPER NOT RELATED TO TOPIC"
t5_view = t5_raw
else:
# Task 3
t3_out = openai_chat(
api_key,
SYSTEM_BASE,
PROMPT_TASK_3
+ "\n\nNOFO TOPIC:\n"
+ topic
+ "\n\nRESEARCH PAPER TEXT:\n\n"
+ paper_text
+ "\n\nRESEARCH PAPER PATH:\n"
+ (paper_path or "uploaded_paper.pdf"),
model,
).strip()
# First idea for Task 4
first_idea = t3_out.split("\n---\n")[0].strip() if "\n---\n" in t3_out else t3_out.split("---")[0].strip()
# Task 4
t4_out = openai_chat(
api_key,
SYSTEM_BASE,
PROMPT_TASK_4
+ "\n\nNOFO TOPIC:\n"
+ topic
+ "\n\nSELECTED IDEA (FROM TASK 3):\n\n"
+ first_idea
+ "\n\nRESEARCH PAPER TEXT:\n\n"
+ paper_text
+ "\n\nNOFO TEXT:\n\n"
+ nofo_text,
model,
).strip()
# Task 5
t5_raw = openai_chat(
api_key,
SYSTEM_BASE,
PROMPT_TASK_5 + "\n\nNOFO TEXT:\n\n" + nofo_text + "\n\nPROPOSAL TEXT:\n\n" + t4_out,
model,
).strip()
# JSON viewer parsing
try:
t5_view = json.loads(t5_raw)
except Exception:
start, end = t5_raw.find("{"), t5_raw.rfind("}")
if start != -1 and end != -1 and end > start:
candidate = t5_raw[start : end + 1]
try:
t5_view = json.loads(candidate)
t5_raw = candidate
except Exception:
t5_view = {"error": "Invalid JSON returned by model", "raw": t5_raw}
else:
t5_view = {"error": "Invalid JSON returned by model", "raw": t5_raw}
results: Dict[str, Any] = {
"model": model,
"task_1_topic": topic,
"task_2_relevance_summary": t2_out,
"task_3_project_ideas": t3_out,
"task_4_full_proposal": t4_out,
"task_5_review_scores_json": t5_raw,
}
fd, out_path = tempfile.mkstemp(prefix="nofo_paper_results_", suffix=".json")
os.close(fd)
with open(out_path, "w", encoding="utf-8") as f:
json.dump(results, f, ensure_ascii=False, indent=2)
return topic, t2_out, t3_out, t4_out, t5_view, out_path
def run_analysis(api_key, model, nofo_file, paper_file, nofo_state, paper_state, paper_path_state):
# Update cached texts if new files are uploaded
if nofo_file is not None:
nofo_state = extract_text_from_pdf(nofo_file.name)
if paper_file is not None:
paper_state = extract_text_from_pdf(paper_file.name)
paper_path_state = paper_file.name
if not nofo_state.strip():
return "", "ERROR: Upload a NOFO PDF.", "", "", {"error": "missing NOFO"}, None, nofo_state, paper_state, paper_path_state
if not paper_state.strip():
return "", "ERROR: Upload a Research Paper PDF.", "", "", {"error": "missing paper"}, None, nofo_state, paper_state, paper_path_state
topic, t2, t3, t4, t5, dl = run_pipeline(api_key, model, nofo_state, paper_state, paper_path_state)
return topic, t2, t3, t4, t5, dl, nofo_state, paper_state, paper_path_state
with gr.Blocks(title="NOFO ↔ Paper Comparator (5-Task Pipeline)") as demo:
gr.Markdown("# NOFO ↔ Paper Comparator (5-Task Pipeline)")
nofo_text_state = gr.State("")
paper_text_state = gr.State("")
paper_path_state = gr.State("")
with gr.Row():
api_key = gr.Textbox(label="OpenAI API Key", type="password", placeholder="sk-...")
model = gr.Textbox(label="Model", value=DEFAULT_MODEL)
with gr.Row():
nofo_pdf = gr.File(label="NOFO PDF (drag & drop)", file_types=[".pdf"])
paper_pdf = gr.File(label="Research Paper PDF (drag & drop)", file_types=[".pdf"])
run_btn = gr.Button("Run Analysis", variant="primary")
clear_btn = gr.Button("Clear")
gr.Markdown("## Results")
task1_out = gr.Textbox(label="Task 1: Topic", lines=2)
task2_out = gr.Textbox(label="Task 2: Relevance Summary", lines=8)
task3_out = gr.Textbox(label="Task 3: 5 Project Ideas", lines=14)
task4_out = gr.Markdown(label="Task 4: Full Proposal")
task5_out = gr.JSON(label="Task 5: Review Scores (JSON)")
download_out = gr.File(label="Download Results (JSON)")
run_btn.click(
fn=run_analysis,
inputs=[api_key, model, nofo_pdf, paper_pdf, nofo_text_state, paper_text_state, paper_path_state],
outputs=[task1_out, task2_out, task3_out, task4_out, task5_out, download_out, nofo_text_state, paper_text_state, paper_path_state],
)
clear_btn.click(
fn=lambda: ("", DEFAULT_MODEL, None, None, "", "", "", "", {}, None, "", "", ""),
inputs=[],
outputs=[api_key, model, nofo_pdf, paper_pdf, task1_out, task2_out, task3_out, task4_out, task5_out, download_out, nofo_text_state, paper_text_state, paper_path_state],
)
if __name__ == "__main__":
demo.launch()