"""Text comparison and diff generation service.""" import difflib from typing import Tuple from writing_studio.utils.logging import logger class DiffService: """Service for comparing texts and generating diffs.""" def __init__(self): """Initialize the diff service.""" self.html_differ = difflib.HtmlDiff() def generate_html_diff( self, original: str, revised: str, context: bool = True, numlines: int = 3, ) -> str: """ Generate HTML diff highlighting differences between texts. Args: original: Original text revised: Revised text context: Show context lines numlines: Number of context lines Returns: HTML diff table """ logger.info("Generating HTML diff") original_lines = original.splitlines() revised_lines = revised.splitlines() html_diff = self.html_differ.make_table( original_lines, revised_lines, fromdesc="Original", todesc="AI Revision", context=context, numlines=numlines, ) return html_diff def generate_unified_diff(self, original: str, revised: str, lineterm: str = "\n") -> str: """ Generate unified diff format. Args: original: Original text revised: Revised text lineterm: Line terminator Returns: Unified diff string """ logger.info("Generating unified diff") original_lines = original.splitlines(keepends=True) revised_lines = revised.splitlines(keepends=True) diff = difflib.unified_diff( original_lines, revised_lines, fromfile="original", tofile="revised", lineterm=lineterm, ) return "".join(diff) def get_similarity_ratio(self, original: str, revised: str) -> float: """ Calculate similarity ratio between two texts. Args: original: Original text revised: Revised text Returns: Similarity ratio (0.0 to 1.0) """ sequence_matcher = difflib.SequenceMatcher(None, original, revised) return sequence_matcher.ratio() def get_change_summary(self, original: str, revised: str) -> dict: """ Get summary of changes between texts. Args: original: Original text revised: Revised text Returns: Dictionary with change statistics """ original_lines = original.splitlines() revised_lines = revised.splitlines() differ = difflib.Differ() diff = list(differ.compare(original_lines, revised_lines)) added = sum(1 for line in diff if line.startswith("+ ")) removed = sum(1 for line in diff if line.startswith("- ")) unchanged = sum(1 for line in diff if line.startswith(" ")) similarity = self.get_similarity_ratio(original, revised) return { "lines_added": added, "lines_removed": removed, "lines_unchanged": unchanged, "similarity_ratio": similarity, "original_lines": len(original_lines), "revised_lines": len(revised_lines), }