tejovanth commited on
Commit
d3b3dd1
Β·
verified Β·
1 Parent(s): 28f2da4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -51
app.py CHANGED
@@ -1,85 +1,76 @@
1
  import gradio as gr
2
  import pdfplumber
 
3
  from transformers import pipeline
4
 
5
- # Load models
6
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
7
  critic = pipeline("text2text-generation", model="google/flan-t5-large")
8
 
9
- # PDF text extraction
10
- def extract_text_from_pdf(pdf_file):
11
- text = ""
12
- with pdfplumber.open(pdf_file) as pdf:
13
- for page in pdf.pages:
14
- content = page.extract_text()
15
- if content:
16
- text += content + "\n"
17
- return text
18
 
19
- # Split text into sections
20
- def split_into_sections(text):
 
 
21
  sections = {}
22
- headings = ["abstract", "introduction", "methodology", "methods", "results", "discussion", "conclusion", "references"]
23
- current = "others"
24
- sections[current] = ""
25
- for line in text.splitlines():
26
- if any(h in line.lower() for h in headings):
27
- current = next((h for h in headings if h in line.lower()), "others")
28
- sections[current] = ""
29
- sections[current] += line + "\n"
30
- return sections
31
 
32
- # LLM-based functions
33
- def summarize(text): return summarizer(text[:3000])[0]['summary_text']
34
- def critique(text): return critic(f"Critique this section:\n{text}", max_length=512)[0]['generated_text']
35
- def find_gaps(text): return critic(f"What research gaps are present?\n{text}", max_length=512)[0]['generated_text']
36
- def custom_query(text): return critic(text, max_length=512)[0]['generated_text']
37
 
38
- # Main handler
39
  def analyze(file, pasted_text, custom_prompt):
40
- if file is not None:
41
- text = extract_text_from_pdf(file.name)
42
- elif pasted_text.strip():
43
- text = pasted_text
44
- else:
45
- return "Please upload a file or enter some text.", "", "", ""
46
 
47
- # Generate outputs
48
- summary = summarize(text)
49
- sections = split_into_sections(text)
50
 
51
  critique_output = ""
52
  gap_output = ""
53
 
54
  for sec, content in sections.items():
55
- snippet = content[:1000]
56
- critique_output += f"### {sec.capitalize()}\n**Critique:**\n{critique(snippet)}\n\n"
57
- gap_output += f"### {sec.capitalize()}\n**Research Gaps:**\n{find_gaps(snippet)}\n\n"
 
 
58
 
59
- custom_response = ""
60
  if custom_prompt.strip():
61
- custom_response = custom_query(custom_prompt)
62
 
63
- return summary, critique_output, gap_output, custom_response
64
 
65
- # Gradio UI
66
  demo = gr.Interface(
67
  fn=analyze,
68
  inputs=[
69
- gr.File(label="Upload PDF", file_types=[".pdf"]),
70
- gr.Textbox(lines=8, placeholder="Or paste your research text here...", label="Pasted Text"),
71
- gr.Textbox(lines=2, placeholder="Optional: Ask a custom question", label="Custom Prompt")
72
  ],
73
  outputs=[
74
  gr.Markdown(label="πŸ“‹ Summary"),
75
- gr.Markdown(label="🧠 Critique by Section"),
76
  gr.Markdown(label="πŸ”Ž Research Gaps"),
77
- gr.Markdown(label="πŸ’¬ Custom Prompt Response")
78
  ],
79
- title="πŸ“„ Research Paper Critique Generator",
80
- description="Upload a research paper (PDF) or paste its content. Get section-wise critiques, summaries, and gap analysis using LLMs.",
81
  theme="default"
82
  )
83
 
84
- demo.launch()
 
85
 
 
1
  import gradio as gr
2
  import pdfplumber
3
+ import re
4
  from transformers import pipeline
5
 
6
+ # Load models once
7
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
8
  critic = pipeline("text2text-generation", model="google/flan-t5-large")
9
 
10
+ # Extract and clean PDF text
11
+ def extract_text(file):
12
+ with pdfplumber.open(file) as pdf:
13
+ return "\n".join([page.extract_text() or "" for page in pdf.pages])
 
 
 
 
 
14
 
15
+ # Split text into sections based on common headings
16
+ def extract_sections(text):
17
+ pattern = r'\n(?P<header>[A-Z][A-Za-z ]{3,40})\n'
18
+ matches = list(re.finditer(pattern, text))
19
  sections = {}
20
+ for i, match in enumerate(matches):
21
+ start = match.end()
22
+ end = matches[i + 1].start() if i + 1 < len(matches) else len(text)
23
+ sections[match.group("header").strip().lower()] = text[start:end].strip()
24
+ return sections or {"full text": text}
 
 
 
 
25
 
26
+ # Limit content for faster inference
27
+ def truncate(text, max_tokens=1000):
28
+ return text[:max_tokens * 5] # approx. chars per token
 
 
29
 
30
+ # Run analysis
31
  def analyze(file, pasted_text, custom_prompt):
32
+ raw_text = extract_text(file.name) if file else pasted_text.strip()
33
+ if not raw_text:
34
+ return "No text provided.", "", "", ""
 
 
 
35
 
36
+ summary = summarizer(truncate(raw_text), max_length=250, min_length=100, do_sample=False)[0]['summary_text']
37
+ sections = extract_sections(raw_text)
 
38
 
39
  critique_output = ""
40
  gap_output = ""
41
 
42
  for sec, content in sections.items():
43
+ snippet = truncate(content)
44
+ critique_out = critic(f"Critique this section:\n{snippet}", max_length=256)[0]['generated_text']
45
+ gaps_out = critic(f"Identify research gaps in this section:\n{snippet}", max_length=256)[0]['generated_text']
46
+ critique_output += f"### {sec.title()}\n{critique_out}\n\n"
47
+ gap_output += f"### {sec.title()}\n{gaps_out}\n\n"
48
 
49
+ custom_out = ""
50
  if custom_prompt.strip():
51
+ custom_out = critic(custom_prompt.strip(), max_length=256)[0]['generated_text']
52
 
53
+ return summary, critique_output.strip(), gap_output.strip(), custom_out
54
 
55
+ # Gradio interface
56
  demo = gr.Interface(
57
  fn=analyze,
58
  inputs=[
59
+ gr.File(label="πŸ“„ Upload PDF", file_types=[".pdf"]),
60
+ gr.Textbox(lines=10, label="✍️ Or paste paper content"),
61
+ gr.Textbox(lines=2, label="πŸ’¬ Optional: Custom prompt")
62
  ],
63
  outputs=[
64
  gr.Markdown(label="πŸ“‹ Summary"),
65
+ gr.Markdown(label="🧠 Critique"),
66
  gr.Markdown(label="πŸ”Ž Research Gaps"),
67
+ gr.Markdown(label="πŸ’‘ Custom Prompt Response")
68
  ],
69
+ title="Research Paper Critique Generator",
70
+ description="Upload a PDF or paste text to receive an AI-generated summary, critique, and identification of research gaps.",
71
  theme="default"
72
  )
73
 
74
+ if __name__ == "__main__":
75
+ demo.launch()
76