Spaces:

EricIhre
/

Product_summary_ai

Sleeping

File size: 2,023 Bytes

import docx # type: ignore
import os

def read_text_from_docx(file_path: str) -> str:
   """
   Extracts all text from a .docx file and returns it as a single string.
   """
   try:
       doc = docx.Document(file_path)
       full_text = []
       for para in doc.paragraphs:
           full_text.append(para.text)
       return '\n'.join(full_text)
   except FileNotFoundError:
       print(f"Error: The file at {file_path} was not found.")
       return ""
   except Exception as e:
       print(f"An error occurred while reading the docx file: {e}")
       return ""

def read_text_file(file_path: str) -> str:
   """
   Reads a plain text file and returns its content.
   """
   try:
       with open(file_path, 'r', encoding='utf-8') as f:
           return f.read()
   except FileNotFoundError:
       print(f"Error: The file at {file_path} was not found.")
       return ""
   except Exception as e:
       print(f"An error occurred while reading the text file: {e}")
       return ""
   
def read_any_document(file_path: str) -> str:
    """
    Reads text from a file, supporting .docx, .pdf, and .txt.
    You will need to have the underlying reader functions (e.g., read_text_from_pdf)
    and required libraries (e.g., pypdf, python-docx) installed.
    """
    if not file_path:
        return ""
    _, extension = os.path.splitext(file_path)
    try:
        if extension.lower() == '.docx':
            # This function must be in your document_processor.py
            return read_text_from_docx(file_path)
        # Add other file types as needed, for example:
        # elif extension.lower() == '.pdf':
        #     return read_text_from_pdf(file_path) # Assumes you have this function
        # elif extension.lower() == '.txt':
        #     with open(file_path, 'r', encoding='utf-8') as f:
        #         return f.read()
        else:
            return f"[Unsupported file type: {extension}]"
    except Exception as e:
        return f"[Error reading file {os.path.basename(file_path)}: {e}]"