Azidan commited on
Commit
aba9518
·
verified ·
1 Parent(s): 641953a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -11
app.py CHANGED
@@ -88,38 +88,59 @@ def generate_ai_advice(summary: str) -> str:
88
  advice_md += "\n**Pro tip**: Combine these with spaced repetition (Anki / Quizlet) for long-term retention!"
89
  return advice_md
90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  def summarize_long_text(text: str) -> str:
92
- """Summarize long text in chunks + add AI study advice."""
 
93
  if not text or len(text.strip()) == 0:
94
  return "No text provided."
95
 
 
 
 
96
  chunks = chunk_text(text)
97
  summaries = []
98
 
99
- for chunk in chunks:
100
  try:
101
  summary = summarizer(
102
  chunk,
103
- max_length=150,
104
- min_length=40,
105
  do_sample=False
106
  )[0]["summary_text"]
107
- summaries.append(summary)
 
108
  except Exception:
109
  pass # skip problematic chunks
110
 
111
- merged = " ".join(summaries)
112
- cleaned_summary = clean_text(merged)
 
 
 
 
113
 
114
- ai_advice = generate_ai_advice(cleaned_summary)
115
- return cleaned_summary + ai_advice
116
 
117
  def read_pdf(file) -> str:
118
  """Safely extract text from PDF."""
119
  try:
120
  reader = PdfReader(file)
121
  pages = [page.extract_text() or "" for page in reader.pages]
122
- return " ".join(pages)
123
  except Exception as e:
124
  return f"PDF read error: {str(e)}"
125
 
@@ -159,7 +180,8 @@ with gr.Blocks() as demo:
159
  "• Handles very long documents (thousands of words)\n"
160
  "• Supports **PDF** upload or direct paste\n"
161
  "• Runs on CPU – works on free hardware\n"
162
- "• Gives you **5 AI-generated study tips** tailored to the content\n"
 
163
  "• Download result as .txt file"
164
  )
165
 
 
88
  advice_md += "\n**Pro tip**: Combine these with spaced repetition (Anki / Quizlet) for long-term retention!"
89
  return advice_md
90
 
91
+ def extract_possible_headings(text: str) -> str:
92
+ """Attempt to extract potential titles and subtitles from raw text.
93
+ This is a simple heuristic: short lines, all caps, or starting with numbers/sections."""
94
+ lines = text.split('\n')
95
+ headings = []
96
+ for line in lines:
97
+ stripped = line.strip()
98
+ if stripped and (len(stripped) < 80) and (stripped.isupper() or re.match(r'^\d+\.?\s', stripped) or re.match(r'^[A-Z][a-z]+\s[A-Z]', stripped)):
99
+ headings.append(stripped)
100
+ if headings:
101
+ return "### Extracted Possible Headings/Subtitles\n\n" + "\n- ".join([''] + headings) + "\n\n---\n\n"
102
+ return ""
103
+
104
  def summarize_long_text(text: str) -> str:
105
+ """Summarize long text in chunks + add AI study advice.
106
+ Now with longer summaries per chunk and formatted as bullet points."""
107
  if not text or len(text.strip()) == 0:
108
  return "No text provided."
109
 
110
+ # Extract possible headings first
111
+ headings_section = extract_possible_headings(text)
112
+
113
  chunks = chunk_text(text)
114
  summaries = []
115
 
116
+ for i, chunk in enumerate(chunks, 1):
117
  try:
118
  summary = summarizer(
119
  chunk,
120
+ max_length=250, # Increased for longer summaries
121
+ min_length=80, # Increased for more detail
122
  do_sample=False
123
  )[0]["summary_text"]
124
+ cleaned = clean_text(summary)
125
+ summaries.append(f"**Chunk {i} Summary:** {cleaned}")
126
  except Exception:
127
  pass # skip problematic chunks
128
 
129
+ # Format summaries as bullet points
130
+ summary_md = "### Detailed Summary (in Bullet Points)\n\n"
131
+ for s in summaries:
132
+ summary_md += f"- {s}\n"
133
+
134
+ ai_advice = generate_ai_advice(summary_md) # Use the bulleted summary for advice generation
135
 
136
+ return headings_section + summary_md + ai_advice
 
137
 
138
  def read_pdf(file) -> str:
139
  """Safely extract text from PDF."""
140
  try:
141
  reader = PdfReader(file)
142
  pages = [page.extract_text() or "" for page in reader.pages]
143
+ return "\n".join(pages) # Join with newlines to preserve line breaks for heading detection
144
  except Exception as e:
145
  return f"PDF read error: {str(e)}"
146
 
 
180
  "• Handles very long documents (thousands of words)\n"
181
  "• Supports **PDF** upload or direct paste\n"
182
  "• Runs on CPU – works on free hardware\n"
183
+ "• Gives you **longer, bullet-point summaries** with possible headings/subtitles\n"
184
+ "• Includes **5 AI-generated study tips** tailored to the content\n"
185
  "• Download result as .txt file"
186
  )
187