example / app.py
tejovanth's picture
Update app.py
d3b3dd1 verified
import gradio as gr
import pdfplumber
import re
from transformers import pipeline
# Load models once
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
critic = pipeline("text2text-generation", model="google/flan-t5-large")
# Extract and clean PDF text
def extract_text(file):
with pdfplumber.open(file) as pdf:
return "\n".join([page.extract_text() or "" for page in pdf.pages])
# Split text into sections based on common headings
def extract_sections(text):
pattern = r'\n(?P<header>[A-Z][A-Za-z ]{3,40})\n'
matches = list(re.finditer(pattern, text))
sections = {}
for i, match in enumerate(matches):
start = match.end()
end = matches[i + 1].start() if i + 1 < len(matches) else len(text)
sections[match.group("header").strip().lower()] = text[start:end].strip()
return sections or {"full text": text}
# Limit content for faster inference
def truncate(text, max_tokens=1000):
return text[:max_tokens * 5] # approx. chars per token
# Run analysis
def analyze(file, pasted_text, custom_prompt):
raw_text = extract_text(file.name) if file else pasted_text.strip()
if not raw_text:
return "No text provided.", "", "", ""
summary = summarizer(truncate(raw_text), max_length=250, min_length=100, do_sample=False)[0]['summary_text']
sections = extract_sections(raw_text)
critique_output = ""
gap_output = ""
for sec, content in sections.items():
snippet = truncate(content)
critique_out = critic(f"Critique this section:\n{snippet}", max_length=256)[0]['generated_text']
gaps_out = critic(f"Identify research gaps in this section:\n{snippet}", max_length=256)[0]['generated_text']
critique_output += f"### {sec.title()}\n{critique_out}\n\n"
gap_output += f"### {sec.title()}\n{gaps_out}\n\n"
custom_out = ""
if custom_prompt.strip():
custom_out = critic(custom_prompt.strip(), max_length=256)[0]['generated_text']
return summary, critique_output.strip(), gap_output.strip(), custom_out
# Gradio interface
demo = gr.Interface(
fn=analyze,
inputs=[
gr.File(label="πŸ“„ Upload PDF", file_types=[".pdf"]),
gr.Textbox(lines=10, label="✍️ Or paste paper content"),
gr.Textbox(lines=2, label="πŸ’¬ Optional: Custom prompt")
],
outputs=[
gr.Markdown(label="πŸ“‹ Summary"),
gr.Markdown(label="🧠 Critique"),
gr.Markdown(label="πŸ”Ž Research Gaps"),
gr.Markdown(label="πŸ’‘ Custom Prompt Response")
],
title="Research Paper Critique Generator",
description="Upload a PDF or paste text to receive an AI-generated summary, critique, and identification of research gaps.",
theme="default"
)
if __name__ == "__main__":
demo.launch()