Spaces:
Running
Running
| """ | |
| Diff generator: creates unified diffs from original vs. fixed file contents. | |
| """ | |
| import difflib | |
| import logging | |
| from typing import Dict, List, Tuple | |
| logger = logging.getLogger(__name__) | |
| def generate_unified_diff( | |
| original_content: str, | |
| fixed_content: str, | |
| filename: str, | |
| context_lines: int = 5, | |
| ) -> str: | |
| """ | |
| Generate a unified diff between two versions of a file. | |
| Returns the diff as a string. | |
| """ | |
| original_lines = original_content.splitlines(keepends=True) | |
| fixed_lines = fixed_content.splitlines(keepends=True) | |
| diff = difflib.unified_diff( | |
| original_lines, | |
| fixed_lines, | |
| fromfile=f"a/{filename}", | |
| tofile=f"b/{filename}", | |
| n=context_lines, | |
| ) | |
| return "".join(diff) | |
| def generate_all_diffs( | |
| original_files: Dict[str, str], | |
| fixed_files: Dict[str, str], | |
| ) -> Dict[str, str]: | |
| """ | |
| Generate unified diffs for all changed files. | |
| Returns {filepath: diff_string}. | |
| Only includes files that actually changed. | |
| """ | |
| diffs = {} | |
| for filepath, fixed_content in fixed_files.items(): | |
| original = original_files.get(filepath, "") | |
| # Normalize line endings for comparison | |
| orig_normalized = original.replace("\r\n", "\n").strip() | |
| fixed_normalized = fixed_content.replace("\r\n", "\n").strip() | |
| if orig_normalized == fixed_normalized: | |
| logger.info("No changes in %s β skipping diff", filepath) | |
| continue | |
| diff = generate_unified_diff( | |
| original, | |
| fixed_content, | |
| filepath, | |
| ) | |
| if diff.strip(): | |
| diffs[filepath] = diff | |
| changed_lines = _count_changed_lines(diff) | |
| logger.info( | |
| "Generated diff for %s: +%d -%d lines", | |
| filepath, changed_lines[0], changed_lines[1], | |
| ) | |
| return diffs | |
| def _count_changed_lines(diff: str) -> Tuple[int, int]: | |
| """Count added and removed lines in a unified diff.""" | |
| added = sum(1 for line in diff.splitlines() if line.startswith("+") and not line.startswith("+++")) | |
| removed = sum(1 for line in diff.splitlines() if line.startswith("-") and not line.startswith("---")) | |
| return added, removed | |
| def format_diff_for_display(diffs: Dict[str, str]) -> str: | |
| """ | |
| Format all diffs into a single markdown code block for display. | |
| """ | |
| if not diffs: | |
| return "No changes generated." | |
| parts = [] | |
| for filepath, diff in diffs.items(): | |
| added, removed = _count_changed_lines(diff) | |
| parts.append( | |
| f"### `{filepath}` (+{added} / -{removed} lines)\n" | |
| f"```diff\n{diff}\n```" | |
| ) | |
| return "\n\n".join(parts) | |
| def parse_fixed_files_from_llm_response( | |
| response: str, | |
| suspect_files: List[str], | |
| ) -> Dict[str, str]: | |
| """ | |
| Parse the LLM's fix generation response to extract {filepath: content}. | |
| The LLM is asked to output: | |
| ### Fix for `path/to/file.py` | |
| ```python | |
| <full file content> | |
| ``` | |
| This function extracts those code blocks. | |
| """ | |
| import re | |
| fixed_files = {} | |
| # Pattern: ### Fix for `filepath` ... ```lang\n<content>\n``` | |
| pattern = re.compile( | |
| r"Fix for `([^`]+)`.*?```(?:\w+)?\n(.*?)```", | |
| re.DOTALL | re.IGNORECASE, | |
| ) | |
| for match in pattern.finditer(response): | |
| filepath = match.group(1).strip() | |
| content = match.group(2) | |
| # Clean up the content | |
| content = content.rstrip() | |
| # Verify the filepath looks reasonable | |
| if "/" in filepath or "." in filepath: | |
| fixed_files[filepath] = content | |
| logger.info("Parsed fixed content for: %s (%d chars)", filepath, len(content)) | |
| # Fallback: try to match any filepath from suspect_files | |
| if not fixed_files: | |
| logger.warning("Could not parse fix blocks from LLM response β trying fallback") | |
| for fp in suspect_files: | |
| # Look for content near the filename mention | |
| escaped = re.escape(fp) | |
| m = re.search( | |
| escaped + r".*?```(?:\w+)?\n(.*?)```", | |
| response, | |
| re.DOTALL, | |
| ) | |
| if m: | |
| fixed_files[fp] = m.group(1).rstrip() | |
| return fixed_files | |
| def get_diff_stats(diffs: Dict[str, str]) -> Dict: | |
| """Return aggregate stats about the diffs.""" | |
| total_added = 0 | |
| total_removed = 0 | |
| for diff in diffs.values(): | |
| a, r = _count_changed_lines(diff) | |
| total_added += a | |
| total_removed += r | |
| return { | |
| "files_changed": len(diffs), | |
| "lines_added": total_added, | |
| "lines_removed": total_removed, | |
| } | |