Spaces:

garywelz
/

shadow

Sleeping

App Files Files Community

shadow / edit_audrey_material.py

garywelz

Initial commit: Shadow of Lillya project (Markdown files only, no binaries)

73e99c6 3 months ago

raw

history blame contribute delete

5.45 kB

	#!/usr/bin/env python3
	"""
	Edit and Clean Audrey's Original Material
	Preserves her voice while improving clarity
	"""

	import re
	from pathlib import Path
	from typing import List, Dict
	import json

	class AudreyMaterialEditor:
	"""Edit Audrey's material while preserving her voice"""

	def __init__(self):
	self.audrey_material_dir = Path("manuscripts/Shadow_of_Lillya/audrey_original")
	self.edited_dir = Path("manuscripts/Shadow_of_Lillya/audrey_edited")
	self.edited_dir.mkdir(parents=True, exist_ok=True)

	def load_original_material(self) -> str:
	"""Load Audrey's compiled original material"""
	original_file = self.audrey_material_dir / "audrey_original_compiled.md"
	if not original_file.exists():
	raise FileNotFoundError(f"Original material not found. Run extract_audrey_material.py first.")

	with open(original_file, 'r', encoding='utf-8') as f:
	return f.read()

	def clean_text(self, text: str, preserve_voice: bool = True) -> str:
	"""Clean text while preserving voice"""
	# Remove excessive blank lines
	text = re.sub(r'\n\s\n\s\n+', '\n\n', text)

	# Fix common formatting issues without changing voice
	# Normalize spacing around punctuation
	text = re.sub(r'\s+([,.!?;:])', r'\1', text)
	text = re.sub(r'([,.!?;:])\s*([A-Z])', r'\1 \2', text)

	# Fix common typos that don't affect voice
	# (Add specific fixes as needed, but be conservative)

	# Preserve dialogue formatting
	# Keep Audrey's unique punctuation and style choices

	return text.strip()

	def identify_editing_needs(self, text: str) -> Dict:
	"""Identify areas that might need editing for clarity"""
	issues = {
	'long_paragraphs': [],
	'repeated_phrases': [],
	'unclear_references': [],
	'incomplete_sentences': []
	}

	paragraphs = text.split('\n\n')
	for i, para in enumerate(paragraphs):
	# Very long paragraphs might need breaking up
	if len(para) > 1000:
	issues['long_paragraphs'].append({
	'paragraph': i,
	'length': len(para),
	'preview': para[:100] + '...'
	})

	# Check for incomplete sentences
	sentences = re.split(r'[.!?]+', para)
	for sent in sentences:
	if sent.strip() and not re.search(r'[.!?]$', sent.strip()):
	if len(sent.strip()) > 20: # Not just a fragment
	issues['incomplete_sentences'].append({
	'paragraph': i,
	'sentence': sent.strip()[:100]
	})

	return issues

	def create_edited_version(self, original_text: str, edits: List[Dict] = None) -> str:
	"""Create edited version with clear attribution"""
	# Extract just the content (skip header)
	content_start = original_text.find('---', original_text.find('---') + 3) + 3
	content = original_text[content_start:].strip()

	# Apply cleaning
	cleaned = self.clean_text(content)

	# Create edited version with attribution
	edited_version = f"""# The Shadow of Lillya
	## Original Material by Audrey Berger Welz
	### Edited for Clarity

	Source: Extracted from Audrey's original draft manuscripts
	Editing: Minor edits for clarity only - voice and style preserved
	Date: {Path(__file__).stat().st_mtime}

	---

	## Editorial Note

	This version contains only Audrey Berger Welz's original writing, with minimal edits for clarity:
	- Fixed obvious typos and formatting issues
	- Normalized spacing and punctuation
	- Preserved all of Audrey's unique voice, style, and word choices
	- No content changes or additions

	Any material beyond this point that is not clearly marked is Audrey's original work.

	---

	{cleaned}
	"""
	return edited_version

	def save_edited_version(self, edited_text: str):
	"""Save the edited version"""
	output_file = self.edited_dir / "audrey_edited_clean.md"
	with open(output_file, 'w', encoding='utf-8') as f:
	f.write(edited_text)

	return output_file

	def main():
	print("✏️ Editing Audrey's Material for Clarity...\n")

	editor = AudreyMaterialEditor()

	# Load original
	print("📖 Loading original material...")
	original = editor.load_original_material()
	print(" ✓ Loaded\n")

	# Identify editing needs
	print("🔍 Identifying editing needs...")
	issues = editor.identify_editing_needs(original)
	print(f" Found {len(issues['long_paragraphs'])} long paragraphs")
	print(f" Found {len(issues['incomplete_sentences'])} potentially incomplete sentences\n")

	# Create edited version
	print("📝 Creating edited version...")
	edited = editor.create_edited_version(original)

	# Save
	print("💾 Saving edited version...")
	output_file = editor.save_edited_version(edited)

	print(f"\n✅ Complete!")
	print(f" 📄 Edited manuscript: {output_file}")
	print(f"\n⚠️ Note: This version contains ONLY Audrey's original material, edited minimally for clarity.")

	if __name__ == '__main__':
	main()