Spaces:

tejovanth
/

example

Sleeping

App Files Files Community

tejovanth commited on Apr 15, 2025

Commit

d3b3dd1

verified ·

1 Parent(s): 28f2da4

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -51

app.py CHANGED Viewed

@@ -1,85 +1,76 @@
 import gradio as gr
 import pdfplumber
 from transformers import pipeline
-# Load models
 summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
 critic = pipeline("text2text-generation", model="google/flan-t5-large")
-# PDF text extraction
-def extract_text_from_pdf(pdf_file):
-    text = ""
-    with pdfplumber.open(pdf_file) as pdf:
-        for page in pdf.pages:
-            content = page.extract_text()
-            if content:
-                text += content + "\n"
-    return text
-# Split text into sections
-def split_into_sections(text):
     sections = {}
-    headings = ["abstract", "introduction", "methodology", "methods", "results", "discussion", "conclusion", "references"]
-    current = "others"
-    sections[current] = ""
-    for line in text.splitlines():
-        if any(h in line.lower() for h in headings):
-            current = next((h for h in headings if h in line.lower()), "others")
-            sections[current] = ""
-        sections[current] += line + "\n"
-    return sections
-# LLM-based functions
-def summarize(text): return summarizer(text[:3000])[0]['summary_text']
-def critique(text): return critic(f"Critique this section:\n{text}", max_length=512)[0]['generated_text']
-def find_gaps(text): return critic(f"What research gaps are present?\n{text}", max_length=512)[0]['generated_text']
-def custom_query(text): return critic(text, max_length=512)[0]['generated_text']
-# Main handler
 def analyze(file, pasted_text, custom_prompt):
-    if file is not None:
-        text = extract_text_from_pdf(file.name)
-    elif pasted_text.strip():
-        text = pasted_text
-    else:
-        return "Please upload a file or enter some text.", "", "", ""
-    # Generate outputs
-    summary = summarize(text)
-    sections = split_into_sections(text)
     critique_output = ""
     gap_output = ""
     for sec, content in sections.items():
-        snippet = content[:1000]
-        critique_output += f"### {sec.capitalize()}\n**Critique:**\n{critique(snippet)}\n\n"
-        gap_output += f"### {sec.capitalize()}\n**Research Gaps:**\n{find_gaps(snippet)}\n\n"
-    custom_response = ""
     if custom_prompt.strip():
-        custom_response = custom_query(custom_prompt)
-    return summary, critique_output, gap_output, custom_response
-# Gradio UI
 demo = gr.Interface(
     fn=analyze,
     inputs=[
-        gr.File(label="Upload PDF", file_types=[".pdf"]),
-        gr.Textbox(lines=8, placeholder="Or paste your research text here...", label="Pasted Text"),
-        gr.Textbox(lines=2, placeholder="Optional: Ask a custom question", label="Custom Prompt")
     ],
     outputs=[
         gr.Markdown(label="📋 Summary"),
-        gr.Markdown(label="🧠 Critique by Section"),
         gr.Markdown(label="🔎 Research Gaps"),
-        gr.Markdown(label="💬 Custom Prompt Response")
     ],
-    title="📄 Research Paper Critique Generator",
-    description="Upload a research paper (PDF) or paste its content. Get section-wise critiques, summaries, and gap analysis using LLMs.",
     theme="default"
 )
-demo.launch()

 import gradio as gr
 import pdfplumber
+import re
 from transformers import pipeline
+# Load models once
 summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
 critic = pipeline("text2text-generation", model="google/flan-t5-large")
+# Extract and clean PDF text
+def extract_text(file):
+    with pdfplumber.open(file) as pdf:
+        return "\n".join([page.extract_text() or "" for page in pdf.pages])
+# Split text into sections based on common headings
+def extract_sections(text):
+    pattern = r'\n(?P<header>[A-Z][A-Za-z ]{3,40})\n'
+    matches = list(re.finditer(pattern, text))
     sections = {}
+    for i, match in enumerate(matches):
+        start = match.end()
+        end = matches[i + 1].start() if i + 1 < len(matches) else len(text)
+        sections[match.group("header").strip().lower()] = text[start:end].strip()
+    return sections or {"full text": text}
+# Limit content for faster inference
+def truncate(text, max_tokens=1000):
+    return text[:max_tokens * 5]  # approx. chars per token
+# Run analysis
 def analyze(file, pasted_text, custom_prompt):
+    raw_text = extract_text(file.name) if file else pasted_text.strip()
+    if not raw_text:
+        return "No text provided.", "", "", ""
+    summary = summarizer(truncate(raw_text), max_length=250, min_length=100, do_sample=False)[0]['summary_text']
+    sections = extract_sections(raw_text)
     critique_output = ""
     gap_output = ""
     for sec, content in sections.items():
+        snippet = truncate(content)
+        critique_out = critic(f"Critique this section:\n{snippet}", max_length=256)[0]['generated_text']
+        gaps_out = critic(f"Identify research gaps in this section:\n{snippet}", max_length=256)[0]['generated_text']
+        critique_output += f"### {sec.title()}\n{critique_out}\n\n"
+        gap_output += f"### {sec.title()}\n{gaps_out}\n\n"
+    custom_out = ""
     if custom_prompt.strip():
+        custom_out = critic(custom_prompt.strip(), max_length=256)[0]['generated_text']
+    return summary, critique_output.strip(), gap_output.strip(), custom_out
+# Gradio interface
 demo = gr.Interface(
     fn=analyze,
     inputs=[
+        gr.File(label="📄 Upload PDF", file_types=[".pdf"]),
+        gr.Textbox(lines=10, label="✍️ Or paste paper content"),
+        gr.Textbox(lines=2, label="💬 Optional: Custom prompt")
     ],
     outputs=[
         gr.Markdown(label="📋 Summary"),
+        gr.Markdown(label="🧠 Critique"),
         gr.Markdown(label="🔎 Research Gaps"),
+        gr.Markdown(label="💡 Custom Prompt Response")
     ],
+    title="Research Paper Critique Generator",
+    description="Upload a PDF or paste text to receive an AI-generated summary, critique, and identification of research gaps.",
     theme="default"
 )
+if __name__ == "__main__":
+    demo.launch()