tejovanth commited on
Commit
1ae4c5e
·
verified ·
1 Parent(s): d65c22a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -9
app.py CHANGED
@@ -11,14 +11,14 @@ logging.basicConfig(level=logging.ERROR)
11
  device = -1 # CPU
12
  print("⚠️ CPU-only mode. Expect ~20–30s for large documents.")
13
 
14
- # === Load the Summarization Model ===
15
  try:
16
  summarizer = pipeline("summarization", model="t5-small", device=device, torch_dtype=torch.float32)
17
  except Exception as e:
18
  print(f"❌ Model loading failed: {e}")
19
  exit(1)
20
 
21
- # === Sentence-Smart Chunking ===
22
  def smart_chunk(text, max_chunk_len=2000):
23
  sentences = re.split(r'(?<=[.!?]) +', text)
24
  chunks, current_chunk = [], ""
@@ -32,7 +32,7 @@ def smart_chunk(text, max_chunk_len=2000):
32
  chunks.append(current_chunk.strip())
33
  return chunks
34
 
35
- # === Summarization for a Single File ===
36
  def summarize_file_bytes(file_bytes, filename):
37
  start_time = time.time()
38
  try:
@@ -47,7 +47,7 @@ def summarize_file_bytes(file_bytes, filename):
47
  if not text:
48
  return f"{filename}: ❌ No text found.", ""
49
 
50
- text = text[:300000] # Trim to model-safe size
51
  chunks = smart_chunk(text)
52
  summaries, line_count = [], 0
53
 
@@ -68,14 +68,13 @@ def summarize_file_bytes(file_bytes, filename):
68
  summary_text = f"📄 **{filename}**\n**Characters**: {len(text)} | **Time**: {total_time:.2f}s\n\n" + "\n\n".join(summaries)
69
  return summary_text, summary_text
70
 
71
- # === Function for Multiple Files ===
72
  def summarize_multiple_files(file_objs):
73
  all_summaries = []
74
  combined_text = ""
75
 
76
- for file_obj in file_objs:
77
- file_bytes = file_obj.read()
78
- filename = file_obj.name.split("/")[-1]
79
  summary, raw_text = summarize_file_bytes(file_bytes, filename)
80
  all_summaries.append(summary)
81
  combined_text += f"\n\n{raw_text}\n" + "="*60 + "\n"
@@ -99,7 +98,7 @@ demo = gr.Interface(
99
  description="Summarizes multiple PDFs or TXTs into at least 15 lines each. Download final output as .txt. CPU-optimized."
100
  )
101
 
102
- # === Run the App ===
103
  if __name__ == "__main__":
104
  try:
105
  demo.launch(share=False, server_port=7860)
 
11
  device = -1 # CPU
12
  print("⚠️ CPU-only mode. Expect ~20–30s for large documents.")
13
 
14
+ # === Load Summarization Model ===
15
  try:
16
  summarizer = pipeline("summarization", model="t5-small", device=device, torch_dtype=torch.float32)
17
  except Exception as e:
18
  print(f"❌ Model loading failed: {e}")
19
  exit(1)
20
 
21
+ # === Sentence-based Chunking ===
22
  def smart_chunk(text, max_chunk_len=2000):
23
  sentences = re.split(r'(?<=[.!?]) +', text)
24
  chunks, current_chunk = [], ""
 
32
  chunks.append(current_chunk.strip())
33
  return chunks
34
 
35
+ # === Summarization for One File ===
36
  def summarize_file_bytes(file_bytes, filename):
37
  start_time = time.time()
38
  try:
 
47
  if not text:
48
  return f"{filename}: ❌ No text found.", ""
49
 
50
+ text = text[:300000]
51
  chunks = smart_chunk(text)
52
  summaries, line_count = [], 0
53
 
 
68
  summary_text = f"📄 **{filename}**\n**Characters**: {len(text)} | **Time**: {total_time:.2f}s\n\n" + "\n\n".join(summaries)
69
  return summary_text, summary_text
70
 
71
+ # === Multiple Files Handler ===
72
  def summarize_multiple_files(file_objs):
73
  all_summaries = []
74
  combined_text = ""
75
 
76
+ for file_bytes, file_info in file_objs:
77
+ filename = file_info['name'].split("/")[-1]
 
78
  summary, raw_text = summarize_file_bytes(file_bytes, filename)
79
  all_summaries.append(summary)
80
  combined_text += f"\n\n{raw_text}\n" + "="*60 + "\n"
 
98
  description="Summarizes multiple PDFs or TXTs into at least 15 lines each. Download final output as .txt. CPU-optimized."
99
  )
100
 
101
+ # === Launch App ===
102
  if __name__ == "__main__":
103
  try:
104
  demo.launch(share=False, server_port=7860)