Spaces:

NamitaB
/

personalKnowledgebase

Runtime error

File size: 1,151 Bytes

17c7e0b

import docx
import fitz  # PyMuPDF

def process_docx(file_path):
    """Extracts text from a .docx file."""
    try:
        doc = docx.Document(file_path)
        full_text = [para.text for para in doc.paragraphs]
        text = '\n'.join(full_text)
        
        print(f"Extracted {len(full_text)} paragraphs from DOCX")  # Debugging
        print(f"Extracted Text: {text[:500]}...")  # Print first 500 chars
        
        return {'text': text.strip()}
    except Exception as e:
        return {'error': str(e)}


def process_pdf(file_path):
    """Extracts text from a .pdf file."""
    try:
        pdf = fitz.open(file_path)
        text = ""
        for page in pdf:
            text += page.get_text()
        pdf.close()
        return {'text': text.strip()}  # Return as a dictionary
    except Exception as e:
        return {'error': str(e)}


def process_txt(file_path):
    """Extracts text from a .txt file."""
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            text = f.read()
        return {'text': text.strip()}  # Return as a dictionary
    except Exception as e:
        return {'error': str(e)}