import gradio as gr import pdfplumber import re from transformers import pipeline # Load models once summarizer = pipeline("summarization", model="facebook/bart-large-cnn") critic = pipeline("text2text-generation", model="google/flan-t5-large") # Extract and clean PDF text def extract_text(file): with pdfplumber.open(file) as pdf: return "\n".join([page.extract_text() or "" for page in pdf.pages]) # Split text into sections based on common headings def extract_sections(text): pattern = r'\n(?P
[A-Z][A-Za-z ]{3,40})\n' matches = list(re.finditer(pattern, text)) sections = {} for i, match in enumerate(matches): start = match.end() end = matches[i + 1].start() if i + 1 < len(matches) else len(text) sections[match.group("header").strip().lower()] = text[start:end].strip() return sections or {"full text": text} # Limit content for faster inference def truncate(text, max_tokens=1000): return text[:max_tokens * 5] # approx. chars per token # Run analysis def analyze(file, pasted_text, custom_prompt): raw_text = extract_text(file.name) if file else pasted_text.strip() if not raw_text: return "No text provided.", "", "", "" summary = summarizer(truncate(raw_text), max_length=250, min_length=100, do_sample=False)[0]['summary_text'] sections = extract_sections(raw_text) critique_output = "" gap_output = "" for sec, content in sections.items(): snippet = truncate(content) critique_out = critic(f"Critique this section:\n{snippet}", max_length=256)[0]['generated_text'] gaps_out = critic(f"Identify research gaps in this section:\n{snippet}", max_length=256)[0]['generated_text'] critique_output += f"### {sec.title()}\n{critique_out}\n\n" gap_output += f"### {sec.title()}\n{gaps_out}\n\n" custom_out = "" if custom_prompt.strip(): custom_out = critic(custom_prompt.strip(), max_length=256)[0]['generated_text'] return summary, critique_output.strip(), gap_output.strip(), custom_out # Gradio interface demo = gr.Interface( fn=analyze, inputs=[ gr.File(label="📄 Upload PDF", file_types=[".pdf"]), gr.Textbox(lines=10, label="✍️ Or paste paper content"), gr.Textbox(lines=2, label="💬 Optional: Custom prompt") ], outputs=[ gr.Markdown(label="📋 Summary"), gr.Markdown(label="🧠 Critique"), gr.Markdown(label="🔎 Research Gaps"), gr.Markdown(label="💡 Custom Prompt Response") ], title="Research Paper Critique Generator", description="Upload a PDF or paste text to receive an AI-generated summary, critique, and identification of research gaps.", theme="default" ) if __name__ == "__main__": demo.launch()