File size: 5,445 Bytes
73e99c6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 | #!/usr/bin/env python3
"""
Edit and Clean Audrey's Original Material
Preserves her voice while improving clarity
"""
import re
from pathlib import Path
from typing import List, Dict
import json
class AudreyMaterialEditor:
"""Edit Audrey's material while preserving her voice"""
def __init__(self):
self.audrey_material_dir = Path("manuscripts/Shadow_of_Lillya/audrey_original")
self.edited_dir = Path("manuscripts/Shadow_of_Lillya/audrey_edited")
self.edited_dir.mkdir(parents=True, exist_ok=True)
def load_original_material(self) -> str:
"""Load Audrey's compiled original material"""
original_file = self.audrey_material_dir / "audrey_original_compiled.md"
if not original_file.exists():
raise FileNotFoundError(f"Original material not found. Run extract_audrey_material.py first.")
with open(original_file, 'r', encoding='utf-8') as f:
return f.read()
def clean_text(self, text: str, preserve_voice: bool = True) -> str:
"""Clean text while preserving voice"""
# Remove excessive blank lines
text = re.sub(r'\n\s*\n\s*\n+', '\n\n', text)
# Fix common formatting issues without changing voice
# Normalize spacing around punctuation
text = re.sub(r'\s+([,.!?;:])', r'\1', text)
text = re.sub(r'([,.!?;:])\s*([A-Z])', r'\1 \2', text)
# Fix common typos that don't affect voice
# (Add specific fixes as needed, but be conservative)
# Preserve dialogue formatting
# Keep Audrey's unique punctuation and style choices
return text.strip()
def identify_editing_needs(self, text: str) -> Dict:
"""Identify areas that might need editing for clarity"""
issues = {
'long_paragraphs': [],
'repeated_phrases': [],
'unclear_references': [],
'incomplete_sentences': []
}
paragraphs = text.split('\n\n')
for i, para in enumerate(paragraphs):
# Very long paragraphs might need breaking up
if len(para) > 1000:
issues['long_paragraphs'].append({
'paragraph': i,
'length': len(para),
'preview': para[:100] + '...'
})
# Check for incomplete sentences
sentences = re.split(r'[.!?]+', para)
for sent in sentences:
if sent.strip() and not re.search(r'[.!?]$', sent.strip()):
if len(sent.strip()) > 20: # Not just a fragment
issues['incomplete_sentences'].append({
'paragraph': i,
'sentence': sent.strip()[:100]
})
return issues
def create_edited_version(self, original_text: str, edits: List[Dict] = None) -> str:
"""Create edited version with clear attribution"""
# Extract just the content (skip header)
content_start = original_text.find('---', original_text.find('---') + 3) + 3
content = original_text[content_start:].strip()
# Apply cleaning
cleaned = self.clean_text(content)
# Create edited version with attribution
edited_version = f"""# The Shadow of Lillya
## Original Material by Audrey Berger Welz
### Edited for Clarity
**Source:** Extracted from Audrey's original draft manuscripts
**Editing:** Minor edits for clarity only - voice and style preserved
**Date:** {Path(__file__).stat().st_mtime}
---
## Editorial Note
This version contains only Audrey Berger Welz's original writing, with minimal edits for clarity:
- Fixed obvious typos and formatting issues
- Normalized spacing and punctuation
- Preserved all of Audrey's unique voice, style, and word choices
- No content changes or additions
Any material beyond this point that is not clearly marked is Audrey's original work.
---
{cleaned}
"""
return edited_version
def save_edited_version(self, edited_text: str):
"""Save the edited version"""
output_file = self.edited_dir / "audrey_edited_clean.md"
with open(output_file, 'w', encoding='utf-8') as f:
f.write(edited_text)
return output_file
def main():
print("βοΈ Editing Audrey's Material for Clarity...\n")
editor = AudreyMaterialEditor()
# Load original
print("π Loading original material...")
original = editor.load_original_material()
print(" β Loaded\n")
# Identify editing needs
print("π Identifying editing needs...")
issues = editor.identify_editing_needs(original)
print(f" Found {len(issues['long_paragraphs'])} long paragraphs")
print(f" Found {len(issues['incomplete_sentences'])} potentially incomplete sentences\n")
# Create edited version
print("π Creating edited version...")
edited = editor.create_edited_version(original)
# Save
print("πΎ Saving edited version...")
output_file = editor.save_edited_version(edited)
print(f"\nβ
Complete!")
print(f" π Edited manuscript: {output_file}")
print(f"\nβ οΈ Note: This version contains ONLY Audrey's original material, edited minimally for clarity.")
if __name__ == '__main__':
main()
|