fixflow / backend /diff_generator.py
E5K7's picture
πŸ”§ Initial commit: FixFlow β€” Autonomous Bug Resolution Agent
cd3b358
"""
Diff generator: creates unified diffs from original vs. fixed file contents.
"""
import difflib
import logging
from typing import Dict, List, Tuple
logger = logging.getLogger(__name__)
def generate_unified_diff(
original_content: str,
fixed_content: str,
filename: str,
context_lines: int = 5,
) -> str:
"""
Generate a unified diff between two versions of a file.
Returns the diff as a string.
"""
original_lines = original_content.splitlines(keepends=True)
fixed_lines = fixed_content.splitlines(keepends=True)
diff = difflib.unified_diff(
original_lines,
fixed_lines,
fromfile=f"a/{filename}",
tofile=f"b/{filename}",
n=context_lines,
)
return "".join(diff)
def generate_all_diffs(
original_files: Dict[str, str],
fixed_files: Dict[str, str],
) -> Dict[str, str]:
"""
Generate unified diffs for all changed files.
Returns {filepath: diff_string}.
Only includes files that actually changed.
"""
diffs = {}
for filepath, fixed_content in fixed_files.items():
original = original_files.get(filepath, "")
# Normalize line endings for comparison
orig_normalized = original.replace("\r\n", "\n").strip()
fixed_normalized = fixed_content.replace("\r\n", "\n").strip()
if orig_normalized == fixed_normalized:
logger.info("No changes in %s β€” skipping diff", filepath)
continue
diff = generate_unified_diff(
original,
fixed_content,
filepath,
)
if diff.strip():
diffs[filepath] = diff
changed_lines = _count_changed_lines(diff)
logger.info(
"Generated diff for %s: +%d -%d lines",
filepath, changed_lines[0], changed_lines[1],
)
return diffs
def _count_changed_lines(diff: str) -> Tuple[int, int]:
"""Count added and removed lines in a unified diff."""
added = sum(1 for line in diff.splitlines() if line.startswith("+") and not line.startswith("+++"))
removed = sum(1 for line in diff.splitlines() if line.startswith("-") and not line.startswith("---"))
return added, removed
def format_diff_for_display(diffs: Dict[str, str]) -> str:
"""
Format all diffs into a single markdown code block for display.
"""
if not diffs:
return "No changes generated."
parts = []
for filepath, diff in diffs.items():
added, removed = _count_changed_lines(diff)
parts.append(
f"### `{filepath}` (+{added} / -{removed} lines)\n"
f"```diff\n{diff}\n```"
)
return "\n\n".join(parts)
def parse_fixed_files_from_llm_response(
response: str,
suspect_files: List[str],
) -> Dict[str, str]:
"""
Parse the LLM's fix generation response to extract {filepath: content}.
The LLM is asked to output:
### Fix for `path/to/file.py`
```python
<full file content>
```
This function extracts those code blocks.
"""
import re
fixed_files = {}
# Pattern: ### Fix for `filepath` ... ```lang\n<content>\n```
pattern = re.compile(
r"Fix for `([^`]+)`.*?```(?:\w+)?\n(.*?)```",
re.DOTALL | re.IGNORECASE,
)
for match in pattern.finditer(response):
filepath = match.group(1).strip()
content = match.group(2)
# Clean up the content
content = content.rstrip()
# Verify the filepath looks reasonable
if "/" in filepath or "." in filepath:
fixed_files[filepath] = content
logger.info("Parsed fixed content for: %s (%d chars)", filepath, len(content))
# Fallback: try to match any filepath from suspect_files
if not fixed_files:
logger.warning("Could not parse fix blocks from LLM response β€” trying fallback")
for fp in suspect_files:
# Look for content near the filename mention
escaped = re.escape(fp)
m = re.search(
escaped + r".*?```(?:\w+)?\n(.*?)```",
response,
re.DOTALL,
)
if m:
fixed_files[fp] = m.group(1).rstrip()
return fixed_files
def get_diff_stats(diffs: Dict[str, str]) -> Dict:
"""Return aggregate stats about the diffs."""
total_added = 0
total_removed = 0
for diff in diffs.values():
a, r = _count_changed_lines(diff)
total_added += a
total_removed += r
return {
"files_changed": len(diffs),
"lines_added": total_added,
"lines_removed": total_removed,
}