import os import google.generativeai as genai from pathlib import Path from tqdm import tqdm import logging # Set up logging logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) class CoverageGenerator: def __init__(self): # Initialize Gemini api_key = os.getenv("GOOGLE_API_KEY") if not api_key: raise ValueError("GOOGLE_API_KEY not found") genai.configure(api_key=api_key) self.model = genai.GenerativeModel('gemini-pro') # Add token tracking self.token_usage = { 'prompt_tokens': 0, 'completion_tokens': 0, 'total_tokens': 0 } # Set chunk size (in estimated tokens) self.chunk_size = 8000 # Conservative size to avoid issues def count_tokens(self, text: str) -> int: """Estimate token count using simple word-based estimation""" words = text.split() return int(len(words) * 1.3) def chunk_screenplay(self, text: str) -> list: """Split screenplay into chunks with overlap for context""" logger.info("Chunking screenplay...") # Split into scenes (looking for standard screenplay headers) scenes = text.split("\n\n") chunks = [] current_chunk = [] current_size = 0 overlap_scenes = 2 # Number of scenes to overlap for i, scene in enumerate(scenes): scene_size = self.count_tokens(scene) if current_size + scene_size > self.chunk_size and current_chunk: # Get overlap scenes from the end of current chunk overlap = current_chunk[-overlap_scenes:] if len(current_chunk) > overlap_scenes else current_chunk # Join current chunk and add to chunks chunks.append("\n\n".join(current_chunk)) # Start new chunk with overlap for context current_chunk = overlap + [scene] current_size = sum(self.count_tokens(s) for s in current_chunk) else: current_chunk.append(scene) current_size += scene_size # Add the last chunk if it exists if current_chunk: chunks.append("\n\n".join(current_chunk)) logger.info(f"Split screenplay into {len(chunks)} chunks with context overlap") return chunks def read_screenplay(self, filepath: Path) -> str: """Read the cleaned screenplay file""" try: logger.info(f"Reading screenplay from: {filepath}") with open(filepath, 'r', encoding='utf-8') as file: text = file.read() tokens = self.count_tokens(text) logger.info(f"Successfully read screenplay. Length: {tokens} tokens (estimated)") return text except Exception as e: logger.error(f"Error reading screenplay: {e}") logger.error(f"Tried to read from: {filepath}") return None def generate_synopsis(self, chunk: str, chunk_num: int = 1, total_chunks: int = 1) -> str: """Generate synopsis for a single chunk""" prompt = f"""As an experienced script analyst, analyze this section ({chunk_num}/{total_chunks}) of the screenplay. Important: This section may overlap with others to maintain context. Focus on: - Key plot developments and their implications for the larger story - Character appearances and development - How this section connects to the ongoing narrative - Major themes or motifs that emerge Provide a summary that captures both the specific events and their significance to the larger narrative. Screenplay section: {chunk}""" try: prompt_tokens = self.count_tokens(prompt) logger.debug(f"Chunk {chunk_num} prompt length: {prompt_tokens} tokens") with tqdm(total=1, desc=f"Processing chunk {chunk_num}/{total_chunks}") as pbar: response = self.model.generate_content(prompt) completion_tokens = self.count_tokens(response.text) pbar.update(1) self.token_usage['prompt_tokens'] += prompt_tokens self.token_usage['completion_tokens'] += completion_tokens self.token_usage['total_tokens'] += (prompt_tokens + completion_tokens) return response.text except Exception as e: logger.error(f"Error processing chunk {chunk_num}: {str(e)}") logger.error("Full error details:", exc_info=True) return None def generate_final_synopsis(self, chunk_synopses: list) -> str: """Combine chunk synopses into a final, coherent synopsis with strong narrative focus""" combined_text = "\n\n".join([f"Section {i+1}:\n{synopsis}" for i, synopsis in enumerate(chunk_synopses)]) prompt = f"""As an experienced script analyst, synthesize these section summaries into a comprehensive, narratively cohesive synopsis of the entire screenplay. You should have distinct sections on: 1. The complete narrative arc from beginning to end 2. Character development across the full story 3. Major themes and how they evolve 4. Key turning points and their impact 5. The core conflict and its resolution Ensure the synopsis flows naturally and captures the full story without revealing the seams between sections. Section summaries: {combined_text}""" try: logger.info("Generating final synopsis") with tqdm(total=1, desc="Creating final synopsis") as pbar: response = self.model.generate_content(prompt) pbar.update(1) return response.text except Exception as e: logger.error(f"Error generating final synopsis: {str(e)}") return None def generate_coverage(self, screenplay_path: Path) -> bool: """Main method to generate full coverage document""" logger.info("Starting coverage generation") self.token_usage = { 'prompt_tokens': 0, 'completion_tokens': 0, 'total_tokens': 0 } with tqdm(total=4, desc="Generating coverage") as pbar: # Read screenplay screenplay_text = self.read_screenplay(screenplay_path) if not screenplay_text: return False pbar.update(1) # Split into chunks chunks = self.chunk_screenplay(screenplay_text) pbar.update(1) # Process each chunk chunk_synopses = [] for i, chunk in enumerate(chunks, 1): synopsis = self.generate_synopsis(chunk, i, len(chunks)) if synopsis: chunk_synopses.append(synopsis) else: logger.error(f"Failed to process chunk {i}") return False pbar.update(1) # Generate final synopsis final_synopsis = self.generate_final_synopsis(chunk_synopses) if not final_synopsis: return False # Save coverage output_dir = screenplay_path.parent output_path = output_dir / "coverage.txt" try: with open(output_path, 'w', encoding='utf-8') as f: f.write("SCREENPLAY COVERAGE\n\n") f.write("### SYNOPSIS ###\n\n") f.write(final_synopsis) # Add token usage summary f.write("\n\n### TOKEN USAGE SUMMARY ###\n") f.write(f"Prompt Tokens: {self.token_usage['prompt_tokens']}\n") f.write(f"Completion Tokens: {self.token_usage['completion_tokens']}\n") f.write(f"Total Tokens: {self.token_usage['total_tokens']}\n") logger.info("\nFinal Token Usage Summary:") logger.info(f"Prompt Tokens: {self.token_usage['prompt_tokens']}") logger.info(f"Completion Tokens: {self.token_usage['completion_tokens']}") logger.info(f"Total Tokens: {self.token_usage['total_tokens']}") pbar.update(1) return True except Exception as e: logger.error(f"Error saving coverage: {str(e)}") logger.error("Full error details:", exc_info=True) return False