Spaces:
Sleeping
Sleeping
File size: 3,386 Bytes
aeb3f7c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
"""Text comparison and diff generation service."""
import difflib
from typing import Tuple
from writing_studio.utils.logging import logger
class DiffService:
"""Service for comparing texts and generating diffs."""
def __init__(self):
"""Initialize the diff service."""
self.html_differ = difflib.HtmlDiff()
def generate_html_diff(
self,
original: str,
revised: str,
context: bool = True,
numlines: int = 3,
) -> str:
"""
Generate HTML diff highlighting differences between texts.
Args:
original: Original text
revised: Revised text
context: Show context lines
numlines: Number of context lines
Returns:
HTML diff table
"""
logger.info("Generating HTML diff")
original_lines = original.splitlines()
revised_lines = revised.splitlines()
html_diff = self.html_differ.make_table(
original_lines,
revised_lines,
fromdesc="Original",
todesc="AI Revision",
context=context,
numlines=numlines,
)
return html_diff
def generate_unified_diff(self, original: str, revised: str, lineterm: str = "\n") -> str:
"""
Generate unified diff format.
Args:
original: Original text
revised: Revised text
lineterm: Line terminator
Returns:
Unified diff string
"""
logger.info("Generating unified diff")
original_lines = original.splitlines(keepends=True)
revised_lines = revised.splitlines(keepends=True)
diff = difflib.unified_diff(
original_lines,
revised_lines,
fromfile="original",
tofile="revised",
lineterm=lineterm,
)
return "".join(diff)
def get_similarity_ratio(self, original: str, revised: str) -> float:
"""
Calculate similarity ratio between two texts.
Args:
original: Original text
revised: Revised text
Returns:
Similarity ratio (0.0 to 1.0)
"""
sequence_matcher = difflib.SequenceMatcher(None, original, revised)
return sequence_matcher.ratio()
def get_change_summary(self, original: str, revised: str) -> dict:
"""
Get summary of changes between texts.
Args:
original: Original text
revised: Revised text
Returns:
Dictionary with change statistics
"""
original_lines = original.splitlines()
revised_lines = revised.splitlines()
differ = difflib.Differ()
diff = list(differ.compare(original_lines, revised_lines))
added = sum(1 for line in diff if line.startswith("+ "))
removed = sum(1 for line in diff if line.startswith("- "))
unchanged = sum(1 for line in diff if line.startswith(" "))
similarity = self.get_similarity_ratio(original, revised)
return {
"lines_added": added,
"lines_removed": removed,
"lines_unchanged": unchanged,
"similarity_ratio": similarity,
"original_lines": len(original_lines),
"revised_lines": len(revised_lines),
}
|