|
|
import gradio as gr |
|
|
import pdfplumber |
|
|
import re |
|
|
from transformers import pipeline |
|
|
|
|
|
|
|
|
summarizer = pipeline("summarization", model="facebook/bart-large-cnn") |
|
|
critic = pipeline("text2text-generation", model="google/flan-t5-large") |
|
|
|
|
|
|
|
|
def extract_text(file): |
|
|
with pdfplumber.open(file) as pdf: |
|
|
return "\n".join([page.extract_text() or "" for page in pdf.pages]) |
|
|
|
|
|
|
|
|
def extract_sections(text): |
|
|
pattern = r'\n(?P<header>[A-Z][A-Za-z ]{3,40})\n' |
|
|
matches = list(re.finditer(pattern, text)) |
|
|
sections = {} |
|
|
for i, match in enumerate(matches): |
|
|
start = match.end() |
|
|
end = matches[i + 1].start() if i + 1 < len(matches) else len(text) |
|
|
sections[match.group("header").strip().lower()] = text[start:end].strip() |
|
|
return sections or {"full text": text} |
|
|
|
|
|
|
|
|
def truncate(text, max_tokens=1000): |
|
|
return text[:max_tokens * 5] |
|
|
|
|
|
|
|
|
def analyze(file, pasted_text, custom_prompt): |
|
|
raw_text = extract_text(file.name) if file else pasted_text.strip() |
|
|
if not raw_text: |
|
|
return "No text provided.", "", "", "" |
|
|
|
|
|
summary = summarizer(truncate(raw_text), max_length=250, min_length=100, do_sample=False)[0]['summary_text'] |
|
|
sections = extract_sections(raw_text) |
|
|
|
|
|
critique_output = "" |
|
|
gap_output = "" |
|
|
|
|
|
for sec, content in sections.items(): |
|
|
snippet = truncate(content) |
|
|
critique_out = critic(f"Critique this section:\n{snippet}", max_length=256)[0]['generated_text'] |
|
|
gaps_out = critic(f"Identify research gaps in this section:\n{snippet}", max_length=256)[0]['generated_text'] |
|
|
critique_output += f"### {sec.title()}\n{critique_out}\n\n" |
|
|
gap_output += f"### {sec.title()}\n{gaps_out}\n\n" |
|
|
|
|
|
custom_out = "" |
|
|
if custom_prompt.strip(): |
|
|
custom_out = critic(custom_prompt.strip(), max_length=256)[0]['generated_text'] |
|
|
|
|
|
return summary, critique_output.strip(), gap_output.strip(), custom_out |
|
|
|
|
|
|
|
|
demo = gr.Interface( |
|
|
fn=analyze, |
|
|
inputs=[ |
|
|
gr.File(label="π Upload PDF", file_types=[".pdf"]), |
|
|
gr.Textbox(lines=10, label="βοΈ Or paste paper content"), |
|
|
gr.Textbox(lines=2, label="π¬ Optional: Custom prompt") |
|
|
], |
|
|
outputs=[ |
|
|
gr.Markdown(label="π Summary"), |
|
|
gr.Markdown(label="π§ Critique"), |
|
|
gr.Markdown(label="π Research Gaps"), |
|
|
gr.Markdown(label="π‘ Custom Prompt Response") |
|
|
], |
|
|
title="Research Paper Critique Generator", |
|
|
description="Upload a PDF or paste text to receive an AI-generated summary, critique, and identification of research gaps.", |
|
|
theme="default" |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |
|
|
|
|
|
|