from docx import Document
from pathlib import Path
from .parser_base import ParserBase
from typing import Tuple, Dict 

class DOCXParser(ParserBase):
    def extract_text_and_metadata(self, filepath: str) -> Tuple[str, Dict]:
        doc = Document(filepath)
        text_list = []
        for para in doc.paragraphs:
            text_list.append(para.text)
        text = "\n".join(text_list)
        metadata = {
            "filetype": "docx",
            "filename": str(Path(filepath).name),
            "num_paragraphs": len(doc.paragraphs)
        }
        return text, metadata