File size: 3,386 Bytes
aeb3f7c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
"""Text comparison and diff generation service."""

import difflib
from typing import Tuple

from writing_studio.utils.logging import logger


class DiffService:
    """Service for comparing texts and generating diffs."""

    def __init__(self):
        """Initialize the diff service."""
        self.html_differ = difflib.HtmlDiff()

    def generate_html_diff(
        self,
        original: str,
        revised: str,
        context: bool = True,
        numlines: int = 3,
    ) -> str:
        """
        Generate HTML diff highlighting differences between texts.

        Args:
            original: Original text
            revised: Revised text
            context: Show context lines
            numlines: Number of context lines

        Returns:
            HTML diff table
        """
        logger.info("Generating HTML diff")

        original_lines = original.splitlines()
        revised_lines = revised.splitlines()

        html_diff = self.html_differ.make_table(
            original_lines,
            revised_lines,
            fromdesc="Original",
            todesc="AI Revision",
            context=context,
            numlines=numlines,
        )

        return html_diff

    def generate_unified_diff(self, original: str, revised: str, lineterm: str = "\n") -> str:
        """
        Generate unified diff format.

        Args:
            original: Original text
            revised: Revised text
            lineterm: Line terminator

        Returns:
            Unified diff string
        """
        logger.info("Generating unified diff")

        original_lines = original.splitlines(keepends=True)
        revised_lines = revised.splitlines(keepends=True)

        diff = difflib.unified_diff(
            original_lines,
            revised_lines,
            fromfile="original",
            tofile="revised",
            lineterm=lineterm,
        )

        return "".join(diff)

    def get_similarity_ratio(self, original: str, revised: str) -> float:
        """
        Calculate similarity ratio between two texts.

        Args:
            original: Original text
            revised: Revised text

        Returns:
            Similarity ratio (0.0 to 1.0)
        """
        sequence_matcher = difflib.SequenceMatcher(None, original, revised)
        return sequence_matcher.ratio()

    def get_change_summary(self, original: str, revised: str) -> dict:
        """
        Get summary of changes between texts.

        Args:
            original: Original text
            revised: Revised text

        Returns:
            Dictionary with change statistics
        """
        original_lines = original.splitlines()
        revised_lines = revised.splitlines()

        differ = difflib.Differ()
        diff = list(differ.compare(original_lines, revised_lines))

        added = sum(1 for line in diff if line.startswith("+ "))
        removed = sum(1 for line in diff if line.startswith("- "))
        unchanged = sum(1 for line in diff if line.startswith("  "))

        similarity = self.get_similarity_ratio(original, revised)

        return {
            "lines_added": added,
            "lines_removed": removed,
            "lines_unchanged": unchanged,
            "similarity_ratio": similarity,
            "original_lines": len(original_lines),
            "revised_lines": len(revised_lines),
        }