Spaces:

NavyDevilDoc
/

Writing_Assistant

Sleeping

NavyDevilDoc commited on Dec 26, 2025

Commit

46dcfa5

verified ·

1 Parent(s): 0cff90b

Create file_processing.py

Files changed (1) hide show

file_processing.py ADDED Viewed

+# file_processing.py
+import PyPDF2
+import docx
+import pandas as pd
+from io import BytesIO
+def extract_text_from_file(uploaded_file):
+    """
+    detects file type and extracts text string
+    """
+    file_type = uploaded_file.name.split('.')[-1].lower()
+    text = ""
+    try:
+        # 1. Handle PDF
+        if file_type == 'pdf':
+            reader = PyPDF2.PdfReader(uploaded_file)
+            for page in reader.pages:
+                text += page.extract_text() + "\n"
+        # 2. Handle Word (.docx)
+        elif file_type in ['docx', 'doc']:
+            doc = docx.Document(uploaded_file)
+            for para in doc.paragraphs:
+                text += para.text + "\n"
+        # 3. Handle Excel/CSV
+        elif file_type in ['csv', 'xlsx', 'xls']:
+            if file_type == 'csv':
+                df = pd.read_csv(uploaded_file)
+            else:
+                df = pd.read_excel(uploaded_file)
+            # Convert dataframe to string representation
+            text = df.to_string()
+        # 4. Handle Plain Text / Markdown
+        else:
+            # decode bytes to string
+            text = uploaded_file.read().decode("utf-8")
+    except Exception as e:
+        return f"Error reading file: {str(e)}"
+    return text