Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -54,21 +54,23 @@ def summarize_file(file):
|
|
| 54 |
chunks = [text[i:i+1000] for i in range(0, len(text), 1000)]
|
| 55 |
print(f"Chunks created: {len(chunks)}")
|
| 56 |
if not chunks: return "β No chunks to summarize"
|
|
|
|
|
|
|
| 57 |
summaries = []
|
| 58 |
-
for i in
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
batch = chunks[i:i+4]
|
| 63 |
-
if any(sum(1 for c in chunk if not c.isalnum()) / len(chunk) > 0.7 for chunk in batch):
|
| 64 |
-
summaries.append(f"**Chunk {i+1}β{i+len(batch)}**: Skipped (equation-heavy)")
|
| 65 |
continue
|
| 66 |
try:
|
| 67 |
-
|
| 68 |
-
summaries.
|
| 69 |
except Exception as e:
|
| 70 |
-
summaries.append(f"**Chunk {i+1}
|
| 71 |
-
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
demo = gr.Interface(
|
| 74 |
fn=summarize_file, inputs=gr.File(label="π Any File", type="binary"),
|
|
|
|
| 54 |
chunks = [text[i:i+1000] for i in range(0, len(text), 1000)]
|
| 55 |
print(f"Chunks created: {len(chunks)}")
|
| 56 |
if not chunks: return "β No chunks to summarize"
|
| 57 |
+
# Select 12 chunks evenly spaced
|
| 58 |
+
selected_indices = [int(i * len(chunks) / 12) for i in range(12)] if len(chunks) >= 12 else list(range(len(chunks)))
|
| 59 |
summaries = []
|
| 60 |
+
for i in selected_indices:
|
| 61 |
+
chunk = chunks[i]
|
| 62 |
+
if sum(1 for c in chunk if not c.isalnum()) / len(chunk) > 0.7:
|
| 63 |
+
summaries.append(f"**Chunk {i+1}**: Skipped (equation-heavy)")
|
|
|
|
|
|
|
|
|
|
| 64 |
continue
|
| 65 |
try:
|
| 66 |
+
summary = summarizer(chunk, max_length=40, min_length=10, do_sample=False)[0]['summary_text']
|
| 67 |
+
summaries.append(f"**Chunk {i+1}**:\n{summary}")
|
| 68 |
except Exception as e:
|
| 69 |
+
summaries.append(f"**Chunk {i+1}**: β Error: {str(e)}")
|
| 70 |
+
# Pad if <12 summaries
|
| 71 |
+
while len(summaries) < 12:
|
| 72 |
+
summaries.append(f"**Chunk {len(summaries)+1}**: Insufficient content for full summary")
|
| 73 |
+
return f"**Chars**: {len(text)}\n**Time**: {time.time()-start:.2f}s\n\n" + "\n\n".join(summaries[:12])
|
| 74 |
|
| 75 |
demo = gr.Interface(
|
| 76 |
fn=summarize_file, inputs=gr.File(label="π Any File", type="binary"),
|