Spaces:
Build error
Build error
File size: 4,575 Bytes
579d7ac 8bc1706 579d7ac 8bc1706 579d7ac 8bc1706 579d7ac 8bc1706 579d7ac 8bc1706 579d7ac 8bc1706 579d7ac 8bc1706 579d7ac | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 | import openai
from dotenv import load_dotenv
load_dotenv()
class TranscriptTransformer:
def __init__(self, model="gpt-4o-mini", max_tokens=128000):
self.model = model
self.max_model_tokens = max_tokens
self.final_response_system_prompt = """You are an expert educational content creator.
Your task is to transform informal transcripts into structured, engaging teaching materials.
Focus on clarity, logical flow, and educational value."""
self.summarizer_system_prompt = """You are an expert educational content creator.
Your task is to summarize the following transcript chunk.
Ensure that the summary is concise and captures the main points.
"""
self.final_response_user_prompt = """Transform the following transcript into a structured {duration}-minute lecture.
Requirements:
1. Create a clear introduction that sets context and learning objectives
2. Organize the content into logical sections with clear headings
3. Include practical examples and real-world applications
4. Add discussion questions or interactive elements
5. Conclude with a summary and key takeaways
6. Target approximately {word_count} words
Format the output in markdown with clear section headers and proper spacing.
"""
self.summarizer_user_prompt = """
{context}
Summarize the following transcript chunk:
{chunk}
"""
def split_text_into_chunks(self, text: str) -> list[str]:
"""Split the text into chunks that fit within the token limit."""
words = text.split()
chunks = []
current_chunk = []
for word in words:
current_chunk.append(word)
if len(" ".join(current_chunk)) >= self.max_model_tokens:
chunks.append(" ".join(current_chunk))
current_chunk = []
if current_chunk:
chunks.append(" ".join(current_chunk))
return chunks
def generate_transcript_chunk_summary(
self, chunk: str, max_output_tokens: int, previous_summary=None
) -> str:
"""Summarize the current chunk with the context of the previous chunk."""
context = (
f"Summary of previous transcript chunk:\n{previous_summary}\n\n"
if previous_summary
else ""
)
response = openai.chat.completions.create(
model=self.model,
messages=[
{"role": "system", "content": self.summarizer_system_prompt},
{"role": "user", "content": self.summarizer_system_prompt.format(context=context, chunk=chunk)},
],
max_tokens=max_output_tokens,
temperature=0.7,
)
generated_text = response.choices[0].message.content or chunk
return generated_text
def summarize_text(self, transcript: str, max_output_tokens: int) -> str:
"""Process a large transcript by splitting it into chunks and combining results."""
chunks = self.split_text_into_chunks(transcript)
if len(chunks) == 1:
return chunks[0]
summarized_transcript = []
previous_summary = None
for chunk in chunks:
teaching_transcript = self.generate_transcript_chunk_summary(
chunk, max_output_tokens, previous_summary
)
summarized_transcript.append(teaching_transcript)
previous_summary = teaching_transcript
return "\n\n".join(summarized_transcript)
def generate_lecture(self, raw_text: str, lecture_duration: int = 30) -> str:
# An average paced lecturer speaks 130 words per minute
max_output_tokens = lecture_duration * 130
summarized_transcript = self.summarize_text(
raw_text, max_output_tokens=max_output_tokens
)
full_text = f"{self.final_response_user_prompt.format(duration=lecture_duration, word_count=max_output_tokens)}\n\nTranscript:\n{summarized_transcript}"
final_response = openai.chat.completions.create(
model="gpt-4o-mini",
messages=[
{"role": "system", "content": self.final_response_system_prompt},
{"role": "user", "content": full_text},
],
max_tokens=max_output_tokens,
temperature=0.7,
)
return (
final_response.choices[0].message.content
or "Error: No response from the model."
)
|