Spaces:
Sleeping
Sleeping
| """Text comparison and diff generation service.""" | |
| import difflib | |
| from typing import Tuple | |
| from writing_studio.utils.logging import logger | |
| class DiffService: | |
| """Service for comparing texts and generating diffs.""" | |
| def __init__(self): | |
| """Initialize the diff service.""" | |
| self.html_differ = difflib.HtmlDiff() | |
| def generate_html_diff( | |
| self, | |
| original: str, | |
| revised: str, | |
| context: bool = True, | |
| numlines: int = 3, | |
| ) -> str: | |
| """ | |
| Generate HTML diff highlighting differences between texts. | |
| Args: | |
| original: Original text | |
| revised: Revised text | |
| context: Show context lines | |
| numlines: Number of context lines | |
| Returns: | |
| HTML diff table | |
| """ | |
| logger.info("Generating HTML diff") | |
| original_lines = original.splitlines() | |
| revised_lines = revised.splitlines() | |
| html_diff = self.html_differ.make_table( | |
| original_lines, | |
| revised_lines, | |
| fromdesc="Original", | |
| todesc="AI Revision", | |
| context=context, | |
| numlines=numlines, | |
| ) | |
| return html_diff | |
| def generate_unified_diff(self, original: str, revised: str, lineterm: str = "\n") -> str: | |
| """ | |
| Generate unified diff format. | |
| Args: | |
| original: Original text | |
| revised: Revised text | |
| lineterm: Line terminator | |
| Returns: | |
| Unified diff string | |
| """ | |
| logger.info("Generating unified diff") | |
| original_lines = original.splitlines(keepends=True) | |
| revised_lines = revised.splitlines(keepends=True) | |
| diff = difflib.unified_diff( | |
| original_lines, | |
| revised_lines, | |
| fromfile="original", | |
| tofile="revised", | |
| lineterm=lineterm, | |
| ) | |
| return "".join(diff) | |
| def get_similarity_ratio(self, original: str, revised: str) -> float: | |
| """ | |
| Calculate similarity ratio between two texts. | |
| Args: | |
| original: Original text | |
| revised: Revised text | |
| Returns: | |
| Similarity ratio (0.0 to 1.0) | |
| """ | |
| sequence_matcher = difflib.SequenceMatcher(None, original, revised) | |
| return sequence_matcher.ratio() | |
| def get_change_summary(self, original: str, revised: str) -> dict: | |
| """ | |
| Get summary of changes between texts. | |
| Args: | |
| original: Original text | |
| revised: Revised text | |
| Returns: | |
| Dictionary with change statistics | |
| """ | |
| original_lines = original.splitlines() | |
| revised_lines = revised.splitlines() | |
| differ = difflib.Differ() | |
| diff = list(differ.compare(original_lines, revised_lines)) | |
| added = sum(1 for line in diff if line.startswith("+ ")) | |
| removed = sum(1 for line in diff if line.startswith("- ")) | |
| unchanged = sum(1 for line in diff if line.startswith(" ")) | |
| similarity = self.get_similarity_ratio(original, revised) | |
| return { | |
| "lines_added": added, | |
| "lines_removed": removed, | |
| "lines_unchanged": unchanged, | |
| "similarity_ratio": similarity, | |
| "original_lines": len(original_lines), | |
| "revised_lines": len(revised_lines), | |
| } | |