Spaces:

yalrashed
/

ScriptLLM

Sleeping

File size: 8,640 Bytes

042441a

import os
import google.generativeai as genai
from pathlib import Path
from tqdm import tqdm
import logging

# Set up logging
logging.basicConfig(level=logging.DEBUG,
                   format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

class CoverageGenerator:
    def __init__(self):
        # Initialize Gemini
        api_key = os.getenv("GOOGLE_API_KEY")
        if not api_key:
            raise ValueError("GOOGLE_API_KEY not found")

        genai.configure(api_key=api_key)
        self.model = genai.GenerativeModel('gemini-pro')

        # Add token tracking
        self.token_usage = {
            'prompt_tokens': 0,
            'completion_tokens': 0,
            'total_tokens': 0
        }

        # Set chunk size (in estimated tokens)
        self.chunk_size = 8000  # Conservative size to avoid issues

    def count_tokens(self, text: str) -> int:
        """Estimate token count using simple word-based estimation"""
        words = text.split()
        return int(len(words) * 1.3)

    def chunk_screenplay(self, text: str) -> list:
        """Split screenplay into chunks with overlap for context"""
        logger.info("Chunking screenplay...")

        # Split into scenes (looking for standard screenplay headers)
        scenes = text.split("\n\n")

        chunks = []
        current_chunk = []
        current_size = 0
        overlap_scenes = 2  # Number of scenes to overlap

        for i, scene in enumerate(scenes):
            scene_size = self.count_tokens(scene)

            if current_size + scene_size > self.chunk_size and current_chunk:
                # Get overlap scenes from the end of current chunk
                overlap = current_chunk[-overlap_scenes:] if len(current_chunk) > overlap_scenes else current_chunk

                # Join current chunk and add to chunks
                chunks.append("\n\n".join(current_chunk))

                # Start new chunk with overlap for context
                current_chunk = overlap + [scene]
                current_size = sum(self.count_tokens(s) for s in current_chunk)
            else:
                current_chunk.append(scene)
                current_size += scene_size

        # Add the last chunk if it exists
        if current_chunk:
            chunks.append("\n\n".join(current_chunk))

        logger.info(f"Split screenplay into {len(chunks)} chunks with context overlap")
        return chunks

    def read_screenplay(self, filepath: Path) -> str:
        """Read the cleaned screenplay file"""
        try:
            logger.info(f"Reading screenplay from: {filepath}")
            with open(filepath, 'r', encoding='utf-8') as file:
                text = file.read()
                tokens = self.count_tokens(text)
                logger.info(f"Successfully read screenplay. Length: {tokens} tokens (estimated)")
                return text
        except Exception as e:
            logger.error(f"Error reading screenplay: {e}")
            logger.error(f"Tried to read from: {filepath}")
            return None

    def generate_synopsis(self, chunk: str, chunk_num: int = 1, total_chunks: int = 1) -> str:
        """Generate synopsis for a single chunk"""
        prompt = f"""As an experienced script analyst, analyze this section ({chunk_num}/{total_chunks}) of the screenplay.

        Important: This section may overlap with others to maintain context. Focus on:
        - Key plot developments and their implications for the larger story
        - Character appearances and development
        - How this section connects to the ongoing narrative
        - Major themes or motifs that emerge

        Provide a summary that captures both the specific events and their significance to the larger narrative.

        Screenplay section:
        {chunk}"""

        try:
            prompt_tokens = self.count_tokens(prompt)
            logger.debug(f"Chunk {chunk_num} prompt length: {prompt_tokens} tokens")

            with tqdm(total=1, desc=f"Processing chunk {chunk_num}/{total_chunks}") as pbar:
                response = self.model.generate_content(prompt)
                completion_tokens = self.count_tokens(response.text)
                pbar.update(1)

            self.token_usage['prompt_tokens'] += prompt_tokens
            self.token_usage['completion_tokens'] += completion_tokens
            self.token_usage['total_tokens'] += (prompt_tokens + completion_tokens)

            return response.text
        except Exception as e:
            logger.error(f"Error processing chunk {chunk_num}: {str(e)}")
            logger.error("Full error details:", exc_info=True)
            return None

    def generate_final_synopsis(self, chunk_synopses: list) -> str:
        """Combine chunk synopses into a final, coherent synopsis with strong narrative focus"""
        combined_text = "\n\n".join([f"Section {i+1}:\n{synopsis}" 
                                   for i, synopsis in enumerate(chunk_synopses)])

        prompt = f"""As an experienced script analyst, synthesize these section summaries into a comprehensive, 
        narratively cohesive synopsis of the entire screenplay. 

        You should have distinct sections on:
        1. The complete narrative arc from beginning to end
        2. Character development across the full story
        3. Major themes and how they evolve
        4. Key turning points and their impact
        5. The core conflict and its resolution

        Ensure the synopsis flows naturally and captures the full story without revealing the seams between sections.

        Section summaries:
        {combined_text}"""

        try:
            logger.info("Generating final synopsis")
            with tqdm(total=1, desc="Creating final synopsis") as pbar:
                response = self.model.generate_content(prompt)
                pbar.update(1)
            return response.text
        except Exception as e:
            logger.error(f"Error generating final synopsis: {str(e)}")
            return None

    def generate_coverage(self, screenplay_path: Path) -> bool:
        """Main method to generate full coverage document"""
        logger.info("Starting coverage generation")

        self.token_usage = {
            'prompt_tokens': 0,
            'completion_tokens': 0,
            'total_tokens': 0
        }

        with tqdm(total=4, desc="Generating coverage") as pbar:
            # Read screenplay
            screenplay_text = self.read_screenplay(screenplay_path)
            if not screenplay_text:
                return False
            pbar.update(1)

            # Split into chunks
            chunks = self.chunk_screenplay(screenplay_text)
            pbar.update(1)

            # Process each chunk
            chunk_synopses = []
            for i, chunk in enumerate(chunks, 1):
                synopsis = self.generate_synopsis(chunk, i, len(chunks))
                if synopsis:
                    chunk_synopses.append(synopsis)
                else:
                    logger.error(f"Failed to process chunk {i}")
                    return False
            pbar.update(1)

            # Generate final synopsis
            final_synopsis = self.generate_final_synopsis(chunk_synopses)
            if not final_synopsis:
                return False

            # Save coverage
            output_dir = screenplay_path.parent
            output_path = output_dir / "coverage.txt"

            try:
                with open(output_path, 'w', encoding='utf-8') as f:
                    f.write("SCREENPLAY COVERAGE\n\n")
                    f.write("### SYNOPSIS ###\n\n")
                    f.write(final_synopsis)

                    # Add token usage summary
                    f.write("\n\n### TOKEN USAGE SUMMARY ###\n")
                    f.write(f"Prompt Tokens: {self.token_usage['prompt_tokens']}\n")
                    f.write(f"Completion Tokens: {self.token_usage['completion_tokens']}\n")
                    f.write(f"Total Tokens: {self.token_usage['total_tokens']}\n")

                logger.info("\nFinal Token Usage Summary:")
                logger.info(f"Prompt Tokens: {self.token_usage['prompt_tokens']}")
                logger.info(f"Completion Tokens: {self.token_usage['completion_tokens']}")
                logger.info(f"Total Tokens: {self.token_usage['total_tokens']}")

                pbar.update(1)
                return True
            except Exception as e:
                logger.error(f"Error saving coverage: {str(e)}")
                logger.error("Full error details:", exc_info=True)
                return False