NavyDevilDoc commited on
Commit
ff15ee0
·
verified ·
1 Parent(s): df000ef

Update file_processing.py

Browse files
Files changed (1) hide show
  1. file_processing.py +19 -0
file_processing.py CHANGED
@@ -3,6 +3,25 @@ import PyPDF2
3
  import docx
4
  import pandas as pd
5
  from io import BytesIO
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  def extract_text_from_file(uploaded_file):
8
  """
 
3
  import docx
4
  import pandas as pd
5
  from io import BytesIO
6
+ import streamlit as st
7
+
8
+ MAX_FILE_SIZE_MB = 10
9
+
10
+ def validate_and_extract(uploaded_file):
11
+ """
12
+ Checks size and extracts text. Returns (text, error_message)
13
+ """
14
+ # 1. Size Check
15
+ file_size_mb = uploaded_file.size / (1024 * 1024)
16
+ if file_size_mb > MAX_FILE_SIZE_MB:
17
+ return None, f"⚠️ File too large ({file_size_mb:.2f}MB). Limit is {MAX_FILE_SIZE_MB}MB. For larger files, please use the RAG system."
18
+
19
+ # 2. Extract Text (Reuse previous logic)
20
+ try:
21
+ text = extract_text_from_file(uploaded_file) # Calling your internal function
22
+ return text, None
23
+ except Exception as e:
24
+ return None, f"Error parsing file: {str(e)}"
25
 
26
  def extract_text_from_file(uploaded_file):
27
  """