tejovanth commited on
Commit
f6b4c1e
Β·
verified Β·
1 Parent(s): 7cd95b1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -7
app.py CHANGED
@@ -14,21 +14,21 @@ except Exception as e:
14
  print(f"❌ Model loading failed: {str(e)}")
15
  exit(1)
16
 
17
- def summarize_file(file):
18
  start = time.time()
 
19
  try:
20
- # Handle file as path (str) or bytes
21
- file_bytes = open(file.name, "rb").read() if isinstance(file, gr.FileData) else file
22
  text = "".join(page.get_text("text", flags=16) for page in fitz.open(stream=file_bytes, filetype="pdf")) if file_bytes[:4].startswith(b'%PDF') else file_bytes.decode("utf-8", errors="ignore")
23
  except Exception as e:
24
  return f"❌ Text extraction failed: {str(e)}"
25
  if not text.strip(): return "❌ No text found"
26
  text = text[:300000]
27
- chunks = [text[i:i+10000] for i in range(0, len(text), 10000)]
28
- if gamba not chunks: return "❌ No chunks to summarize"
 
29
  summaries = []
30
  for i, chunk in enumerate(chunks):
31
- if time.time() - start > 9:
32
  summaries.append("⚠️ Stopped early")
33
  break
34
  try:
@@ -38,7 +38,11 @@ def summarize_file(file):
38
  summaries.append(f"**Chunk {i+1}**: ❌ Error: {str(e)}")
39
  return f"**Chars**: {len(text)}\n**Time**: {time.time()-start:.2f}s\n\n" + "\n\n".join(summaries)
40
 
41
- demo = gr.Interface(fn=summarize_file, inputs=gr.File(label="πŸ“„ PDF/TXT Notes"), outputs=gr.Textbox(label="πŸ“ Summary"), title="Fast Summarizer", description="300,000+ chars in ~15–25s (CPU)")
 
 
 
 
42
 
43
  if __name__ == "__main__":
44
  try:
 
14
  print(f"❌ Model loading failed: {str(e)}")
15
  exit(1)
16
 
17
+ def summarize_file(file_bytes):
18
  start = time.time()
19
+ print(f"File type: {type(file_bytes)}")
20
  try:
 
 
21
  text = "".join(page.get_text("text", flags=16) for page in fitz.open(stream=file_bytes, filetype="pdf")) if file_bytes[:4].startswith(b'%PDF') else file_bytes.decode("utf-8", errors="ignore")
22
  except Exception as e:
23
  return f"❌ Text extraction failed: {str(e)}"
24
  if not text.strip(): return "❌ No text found"
25
  text = text[:300000]
26
+ chunks = [text[i:i+8000] for i in range(0, len(text), 8000)]
27
+ print(f"Chunks created: {len(chunks)}")
28
+ if not chunks: return "❌ No chunks to summarize"
29
  summaries = []
30
  for i, chunk in enumerate(chunks):
31
+ if time.time() - start > 12:
32
  summaries.append("⚠️ Stopped early")
33
  break
34
  try:
 
38
  summaries.append(f"**Chunk {i+1}**: ❌ Error: {str(e)}")
39
  return f"**Chars**: {len(text)}\n**Time**: {time.time()-start:.2f}s\n\n" + "\n\n".join(summaries)
40
 
41
+ demo = gr.Interface(
42
+ fn=summarize_file, inputs=gr.File(label="πŸ“„ PDF/TXT Notes", type="binary"),
43
+ outputs=gr.Textbox(label="πŸ“ Summary"),
44
+ title="Fast Summarizer", description="300,000+ chars in ~15–25s (CPU)"
45
+ )
46
 
47
  if __name__ == "__main__":
48
  try: