from pptx import Presentation
import pdfplumber
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
from io import BytesIO
import docx
from pathlib import Path
import openpyxl
import re

def extract_text(file_path: Path, file_type: str) -> str:
    text = ""

    if file_type == "txt":
        with open(file_path, "r", encoding="utf-8") as f:
            text = f.read()

    elif file_type == "docx":
        doc = docx.Document(file_path)
        text = "\n".join([para.text for para in doc.paragraphs if para.text])

    elif file_type == "xlsx":
        wb = openpyxl.load_workbook(file_path)
        sheet = wb.active
        for row in sheet.rows:
            for cell in row:
                if cell.value is not None:
                    text += str(cell.value) + " "

    elif file_type == "pptx":
        prs = Presentation(file_path)
        for slide in prs.slides:
            for shape in slide.shapes:
                if shape.has_text_frame:
                    for paragraph in shape.text_frame.paragraphs:
                        if (clean_text := paragraph.text.strip()):
                            text += clean_text + "\n"
                            
                elif shape.has_table:
                    for row in shape.table.rows:
                        for cell in row.cells:
                            if (cell_text := cell.text.strip()):
                                text += cell_text + "\n"
        

    elif file_type == "pdf":
        with pdfplumber.open(file_path) as pdf:
            text = "\n".join(
                page.extract_text() 
                for page in pdf.pages 
                if page.extract_text()
            )

    return text.strip()

def save_file(text: str, original_path: Path, file_type: str, output_path: Path):
    if file_type == "docx":
        doc = docx.Document()
        doc.add_paragraph(text)
        doc.save(output_path)

    elif file_type == "xlsx":
        wb = openpyxl.Workbook()
        sheet = wb.active
        text_lines = text.split(
            "\n"
        ) 
        for i, line in enumerate(text_lines, start=1):
            sheet.cell(row=i, column=1, value=line)
        wb.save(output_path)

    elif file_type == "pptx":
        prs = Presentation()
        slide_layout = prs.slide_layouts[1]
        slide = prs.slides.add_slide(slide_layout)
        content = slide.shapes.placeholders[1]
        content.text = text
        prs.save(output_path)

    elif file_type == "pdf":
         with open(output_path, "wb") as f: 
            pdf_buffer = BytesIO()
            c = canvas.Canvas(pdf_buffer, pagesize=letter)
            text_lines = text.split("\n")
            y = 750  
            for line in text_lines:
                c.drawString(72, y, line)
                y -= 12  
                if y < 50:  
                    c.showPage()
                    y = 750
            c.save()
            f.write(pdf_buffer.getvalue())

    else:
        with open(output_path, "w", encoding="utf-8") as f:
            f.write(text)


def verify_summary(summary: str, original: str) -> str:
    """Simplified verification using word matching"""
    original_lower = original.lower()
    verified = []
    
    for sentence in summary.split('.'):
        sentence = sentence.strip()
        if not sentence:
            continue
            
        # Count matching words (minimum 3 letters)
        matches = 0
        total_words = 0
        for word in sentence.lower().split():
            if len(word) >= 3 and word in original_lower:
                matches += 1
            total_words += 1
        
        # Keep sentence if at least 30% of significant words match
        if total_words > 0 and (matches / total_words) >= 0.3:
            verified.append(sentence)
    
    return '. '.join(verified) if verified else summary[:500]

def ensure_complete_sentences(text: str) -> str:
    """Guarantees proper sentence structure with robust error handling"""
    if not text or not isinstance(text, str):
        return "" 
    
    try:
        # Normalize whitespace
        text = ' '.join(text.split())
        
        # Split on sentence boundaries
        sentences = re.split(r'(?<=[.!?])\s+', text)
        
        # Filter and validate sentences
        valid_sentences = [
            s.strip() for s in sentences 
            if s.strip() and s[-1] in {'.', '!', '?'}
        ]
        
        # Reconstruct text with proper spacing
        reconstructed = ' '.join(valid_sentences)
        
        # Final safety check
        if not reconstructed.endswith(('.', '!', '?')):
            last_break = max(
                reconstructed.rfind('.'), 
                reconstructed.rfind('!'), 
                reconstructed.rfind('?')
            )
            if last_break > 0:
                reconstructed = reconstructed[:last_break + 1]
            else:
                reconstructed = reconstructed + '.' if reconstructed else ""
                
        return reconstructed
    
    except Exception:
        return text