jmisak's picture
Upload 41 files
aeb3f7c verified
"""Text comparison and diff generation service."""
import difflib
from typing import Tuple
from writing_studio.utils.logging import logger
class DiffService:
"""Service for comparing texts and generating diffs."""
def __init__(self):
"""Initialize the diff service."""
self.html_differ = difflib.HtmlDiff()
def generate_html_diff(
self,
original: str,
revised: str,
context: bool = True,
numlines: int = 3,
) -> str:
"""
Generate HTML diff highlighting differences between texts.
Args:
original: Original text
revised: Revised text
context: Show context lines
numlines: Number of context lines
Returns:
HTML diff table
"""
logger.info("Generating HTML diff")
original_lines = original.splitlines()
revised_lines = revised.splitlines()
html_diff = self.html_differ.make_table(
original_lines,
revised_lines,
fromdesc="Original",
todesc="AI Revision",
context=context,
numlines=numlines,
)
return html_diff
def generate_unified_diff(self, original: str, revised: str, lineterm: str = "\n") -> str:
"""
Generate unified diff format.
Args:
original: Original text
revised: Revised text
lineterm: Line terminator
Returns:
Unified diff string
"""
logger.info("Generating unified diff")
original_lines = original.splitlines(keepends=True)
revised_lines = revised.splitlines(keepends=True)
diff = difflib.unified_diff(
original_lines,
revised_lines,
fromfile="original",
tofile="revised",
lineterm=lineterm,
)
return "".join(diff)
def get_similarity_ratio(self, original: str, revised: str) -> float:
"""
Calculate similarity ratio between two texts.
Args:
original: Original text
revised: Revised text
Returns:
Similarity ratio (0.0 to 1.0)
"""
sequence_matcher = difflib.SequenceMatcher(None, original, revised)
return sequence_matcher.ratio()
def get_change_summary(self, original: str, revised: str) -> dict:
"""
Get summary of changes between texts.
Args:
original: Original text
revised: Revised text
Returns:
Dictionary with change statistics
"""
original_lines = original.splitlines()
revised_lines = revised.splitlines()
differ = difflib.Differ()
diff = list(differ.compare(original_lines, revised_lines))
added = sum(1 for line in diff if line.startswith("+ "))
removed = sum(1 for line in diff if line.startswith("- "))
unchanged = sum(1 for line in diff if line.startswith(" "))
similarity = self.get_similarity_ratio(original, revised)
return {
"lines_added": added,
"lines_removed": removed,
"lines_unchanged": unchanged,
"similarity_ratio": similarity,
"original_lines": len(original_lines),
"revised_lines": len(revised_lines),
}