File size: 8,640 Bytes
042441a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
import os
import google.generativeai as genai
from pathlib import Path
from tqdm import tqdm
import logging

# Set up logging
logging.basicConfig(level=logging.DEBUG,
                   format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

class CoverageGenerator:
    def __init__(self):
        # Initialize Gemini
        api_key = os.getenv("GOOGLE_API_KEY")
        if not api_key:
            raise ValueError("GOOGLE_API_KEY not found")

        genai.configure(api_key=api_key)
        self.model = genai.GenerativeModel('gemini-pro')

        # Add token tracking
        self.token_usage = {
            'prompt_tokens': 0,
            'completion_tokens': 0,
            'total_tokens': 0
        }

        # Set chunk size (in estimated tokens)
        self.chunk_size = 8000  # Conservative size to avoid issues

    def count_tokens(self, text: str) -> int:
        """Estimate token count using simple word-based estimation"""
        words = text.split()
        return int(len(words) * 1.3)

    def chunk_screenplay(self, text: str) -> list:
        """Split screenplay into chunks with overlap for context"""
        logger.info("Chunking screenplay...")

        # Split into scenes (looking for standard screenplay headers)
        scenes = text.split("\n\n")

        chunks = []
        current_chunk = []
        current_size = 0
        overlap_scenes = 2  # Number of scenes to overlap

        for i, scene in enumerate(scenes):
            scene_size = self.count_tokens(scene)

            if current_size + scene_size > self.chunk_size and current_chunk:
                # Get overlap scenes from the end of current chunk
                overlap = current_chunk[-overlap_scenes:] if len(current_chunk) > overlap_scenes else current_chunk

                # Join current chunk and add to chunks
                chunks.append("\n\n".join(current_chunk))

                # Start new chunk with overlap for context
                current_chunk = overlap + [scene]
                current_size = sum(self.count_tokens(s) for s in current_chunk)
            else:
                current_chunk.append(scene)
                current_size += scene_size

        # Add the last chunk if it exists
        if current_chunk:
            chunks.append("\n\n".join(current_chunk))

        logger.info(f"Split screenplay into {len(chunks)} chunks with context overlap")
        return chunks

    def read_screenplay(self, filepath: Path) -> str:
        """Read the cleaned screenplay file"""
        try:
            logger.info(f"Reading screenplay from: {filepath}")
            with open(filepath, 'r', encoding='utf-8') as file:
                text = file.read()
                tokens = self.count_tokens(text)
                logger.info(f"Successfully read screenplay. Length: {tokens} tokens (estimated)")
                return text
        except Exception as e:
            logger.error(f"Error reading screenplay: {e}")
            logger.error(f"Tried to read from: {filepath}")
            return None

    def generate_synopsis(self, chunk: str, chunk_num: int = 1, total_chunks: int = 1) -> str:
        """Generate synopsis for a single chunk"""
        prompt = f"""As an experienced script analyst, analyze this section ({chunk_num}/{total_chunks}) of the screenplay.

        Important: This section may overlap with others to maintain context. Focus on:
        - Key plot developments and their implications for the larger story
        - Character appearances and development
        - How this section connects to the ongoing narrative
        - Major themes or motifs that emerge

        Provide a summary that captures both the specific events and their significance to the larger narrative.

        Screenplay section:
        {chunk}"""

        try:
            prompt_tokens = self.count_tokens(prompt)
            logger.debug(f"Chunk {chunk_num} prompt length: {prompt_tokens} tokens")

            with tqdm(total=1, desc=f"Processing chunk {chunk_num}/{total_chunks}") as pbar:
                response = self.model.generate_content(prompt)
                completion_tokens = self.count_tokens(response.text)
                pbar.update(1)

            self.token_usage['prompt_tokens'] += prompt_tokens
            self.token_usage['completion_tokens'] += completion_tokens
            self.token_usage['total_tokens'] += (prompt_tokens + completion_tokens)

            return response.text
        except Exception as e:
            logger.error(f"Error processing chunk {chunk_num}: {str(e)}")
            logger.error("Full error details:", exc_info=True)
            return None

    def generate_final_synopsis(self, chunk_synopses: list) -> str:
        """Combine chunk synopses into a final, coherent synopsis with strong narrative focus"""
        combined_text = "\n\n".join([f"Section {i+1}:\n{synopsis}" 
                                   for i, synopsis in enumerate(chunk_synopses)])

        prompt = f"""As an experienced script analyst, synthesize these section summaries into a comprehensive, 
        narratively cohesive synopsis of the entire screenplay. 

        You should have distinct sections on:
        1. The complete narrative arc from beginning to end
        2. Character development across the full story
        3. Major themes and how they evolve
        4. Key turning points and their impact
        5. The core conflict and its resolution

        Ensure the synopsis flows naturally and captures the full story without revealing the seams between sections.

        Section summaries:
        {combined_text}"""

        try:
            logger.info("Generating final synopsis")
            with tqdm(total=1, desc="Creating final synopsis") as pbar:
                response = self.model.generate_content(prompt)
                pbar.update(1)
            return response.text
        except Exception as e:
            logger.error(f"Error generating final synopsis: {str(e)}")
            return None

    def generate_coverage(self, screenplay_path: Path) -> bool:
        """Main method to generate full coverage document"""
        logger.info("Starting coverage generation")

        self.token_usage = {
            'prompt_tokens': 0,
            'completion_tokens': 0,
            'total_tokens': 0
        }

        with tqdm(total=4, desc="Generating coverage") as pbar:
            # Read screenplay
            screenplay_text = self.read_screenplay(screenplay_path)
            if not screenplay_text:
                return False
            pbar.update(1)

            # Split into chunks
            chunks = self.chunk_screenplay(screenplay_text)
            pbar.update(1)

            # Process each chunk
            chunk_synopses = []
            for i, chunk in enumerate(chunks, 1):
                synopsis = self.generate_synopsis(chunk, i, len(chunks))
                if synopsis:
                    chunk_synopses.append(synopsis)
                else:
                    logger.error(f"Failed to process chunk {i}")
                    return False
            pbar.update(1)

            # Generate final synopsis
            final_synopsis = self.generate_final_synopsis(chunk_synopses)
            if not final_synopsis:
                return False

            # Save coverage
            output_dir = screenplay_path.parent
            output_path = output_dir / "coverage.txt"

            try:
                with open(output_path, 'w', encoding='utf-8') as f:
                    f.write("SCREENPLAY COVERAGE\n\n")
                    f.write("### SYNOPSIS ###\n\n")
                    f.write(final_synopsis)

                    # Add token usage summary
                    f.write("\n\n### TOKEN USAGE SUMMARY ###\n")
                    f.write(f"Prompt Tokens: {self.token_usage['prompt_tokens']}\n")
                    f.write(f"Completion Tokens: {self.token_usage['completion_tokens']}\n")
                    f.write(f"Total Tokens: {self.token_usage['total_tokens']}\n")

                logger.info("\nFinal Token Usage Summary:")
                logger.info(f"Prompt Tokens: {self.token_usage['prompt_tokens']}")
                logger.info(f"Completion Tokens: {self.token_usage['completion_tokens']}")
                logger.info(f"Total Tokens: {self.token_usage['total_tokens']}")

                pbar.update(1)
                return True
            except Exception as e:
                logger.error(f"Error saving coverage: {str(e)}")
                logger.error("Full error details:", exc_info=True)
                return False