tejovanth commited on
Commit
9f86b13
Β·
verified Β·
1 Parent(s): 8723dfc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -7
app.py CHANGED
@@ -37,16 +37,38 @@ def extract_text(file_bytes):
37
  except Exception as e:
38
  return f"❌ Error reading file: {str(e)}"
39
 
 
 
 
 
40
  # Summarize the extracted text
41
  def summarize_file(file_bytes):
42
  text = extract_text(file_bytes)
43
  if not text or len(text.strip()) == 0:
44
  return "❌ No text found in the uploaded file."
45
 
46
- # Truncate to fit model token limit
47
- text = text[:3000]
48
- summary = summarizer(text, max_length=150, min_length=40, do_sample=False)
49
- return summary[0]["summary_text"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
  # Gradio UI
52
  demo = gr.Interface(
@@ -54,11 +76,12 @@ demo = gr.Interface(
54
  inputs=gr.File(label="πŸ“„ Upload Notes (PDF, TXT, or Handwritten Image)", type="binary"),
55
  outputs=gr.Textbox(label="πŸ“ Summarized Notes"),
56
  title="πŸ“š Note Summarizer",
57
- description="Upload academic notes in PDF, TXT, or image format. This app extracts and summarizes the content using a Hugging Face transformer model."
58
  )
59
 
60
- demo.launch()
61
-
 
62
 
63
 
64
 
 
37
  except Exception as e:
38
  return f"❌ Error reading file: {str(e)}"
39
 
40
+ # Function to chunk text into smaller pieces
41
+ def chunk_text(text, chunk_size=4000):
42
+ return [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
43
+
44
  # Summarize the extracted text
45
  def summarize_file(file_bytes):
46
  text = extract_text(file_bytes)
47
  if not text or len(text.strip()) == 0:
48
  return "❌ No text found in the uploaded file."
49
 
50
+ # Ensure at least 300,000 characters can be processed (no truncation)
51
+ if len(text) > 300000:
52
+ text = text[:300000] # Optional: cap at 300,000 if desired, but can be removed for larger inputs
53
+
54
+ # Chunk the text into 4,000-character segments
55
+ chunks = chunk_text(text, chunk_size=4000)
56
+ if not chunks:
57
+ return "❌ No valid chunks to summarize."
58
+
59
+ # Summarize each chunk
60
+ summaries = []
61
+ for i, chunk in enumerate(chunks):
62
+ try:
63
+ summary = summarizer(chunk, max_length=150, min_length=40, do_sample=False)
64
+ summaries.append(f"**Chunk {i+1} Summary**:\n{summary[0]['summary_text']}")
65
+ except Exception as e:
66
+ summaries.append(f"**Chunk {i+1} Summary**: ❌ Error summarizing chunk: {str(e)}")
67
+
68
+ # Combine summaries
69
+ combined_summary = "\n\n".join(summaries)
70
+ total_chars = len(text)
71
+ return f"**Total Characters Processed**: {total_chars}\n\n**Summaries**:\n{combined_summary}"
72
 
73
  # Gradio UI
74
  demo = gr.Interface(
 
76
  inputs=gr.File(label="πŸ“„ Upload Notes (PDF, TXT, or Handwritten Image)", type="binary"),
77
  outputs=gr.Textbox(label="πŸ“ Summarized Notes"),
78
  title="πŸ“š Note Summarizer",
79
+ description="Upload academic notes in PDF, TXT, or image format (supports at least 300,000 characters). This app extracts and summarizes the content using a Hugging Face transformer model."
80
  )
81
 
82
+ # Launch the interface
83
+ if __name__ == "__main__":
84
+ demo.launch()
85
 
86
 
87