Spaces:
Build error
Build error
| import openai | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| class TranscriptTransformer: | |
| def __init__(self, model="gpt-4o-mini", max_tokens=128000): | |
| self.model = model | |
| self.max_model_tokens = max_tokens | |
| self.final_response_system_prompt = """You are an expert educational content creator. | |
| Your task is to transform informal transcripts into structured, engaging teaching materials. | |
| Focus on clarity, logical flow, and educational value.""" | |
| self.summarizer_system_prompt = """You are an expert educational content creator. | |
| Your task is to summarize the following transcript chunk. | |
| Ensure that the summary is concise and captures the main points. | |
| """ | |
| self.final_response_user_prompt = """Transform the following transcript into a structured {duration}-minute lecture. | |
| Requirements: | |
| 1. Create a clear introduction that sets context and learning objectives | |
| 2. Organize the content into logical sections with clear headings | |
| 3. Include practical examples and real-world applications | |
| 4. Add discussion questions or interactive elements | |
| 5. Conclude with a summary and key takeaways | |
| 6. Target approximately {word_count} words | |
| Format the output in markdown with clear section headers and proper spacing. | |
| """ | |
| self.summarizer_user_prompt = """ | |
| {context} | |
| Summarize the following transcript chunk: | |
| {chunk} | |
| """ | |
| def split_text_into_chunks(self, text: str) -> list[str]: | |
| """Split the text into chunks that fit within the token limit.""" | |
| words = text.split() | |
| chunks = [] | |
| current_chunk = [] | |
| for word in words: | |
| current_chunk.append(word) | |
| if len(" ".join(current_chunk)) >= self.max_model_tokens: | |
| chunks.append(" ".join(current_chunk)) | |
| current_chunk = [] | |
| if current_chunk: | |
| chunks.append(" ".join(current_chunk)) | |
| return chunks | |
| def generate_transcript_chunk_summary( | |
| self, chunk: str, max_output_tokens: int, previous_summary=None | |
| ) -> str: | |
| """Summarize the current chunk with the context of the previous chunk.""" | |
| context = ( | |
| f"Summary of previous transcript chunk:\n{previous_summary}\n\n" | |
| if previous_summary | |
| else "" | |
| ) | |
| response = openai.chat.completions.create( | |
| model=self.model, | |
| messages=[ | |
| {"role": "system", "content": self.summarizer_system_prompt}, | |
| {"role": "user", "content": self.summarizer_system_prompt.format(context=context, chunk=chunk)}, | |
| ], | |
| max_tokens=max_output_tokens, | |
| temperature=0.7, | |
| ) | |
| generated_text = response.choices[0].message.content or chunk | |
| return generated_text | |
| def summarize_text(self, transcript: str, max_output_tokens: int) -> str: | |
| """Process a large transcript by splitting it into chunks and combining results.""" | |
| chunks = self.split_text_into_chunks(transcript) | |
| if len(chunks) == 1: | |
| return chunks[0] | |
| summarized_transcript = [] | |
| previous_summary = None | |
| for chunk in chunks: | |
| teaching_transcript = self.generate_transcript_chunk_summary( | |
| chunk, max_output_tokens, previous_summary | |
| ) | |
| summarized_transcript.append(teaching_transcript) | |
| previous_summary = teaching_transcript | |
| return "\n\n".join(summarized_transcript) | |
| def generate_lecture(self, raw_text: str, lecture_duration: int = 30) -> str: | |
| # An average paced lecturer speaks 130 words per minute | |
| max_output_tokens = lecture_duration * 130 | |
| summarized_transcript = self.summarize_text( | |
| raw_text, max_output_tokens=max_output_tokens | |
| ) | |
| full_text = f"{self.final_response_user_prompt.format(duration=lecture_duration, word_count=max_output_tokens)}\n\nTranscript:\n{summarized_transcript}" | |
| final_response = openai.chat.completions.create( | |
| model="gpt-4o-mini", | |
| messages=[ | |
| {"role": "system", "content": self.final_response_system_prompt}, | |
| {"role": "user", "content": full_text}, | |
| ], | |
| max_tokens=max_output_tokens, | |
| temperature=0.7, | |
| ) | |
| return ( | |
| final_response.choices[0].message.content | |
| or "Error: No response from the model." | |
| ) | |