#!/usr/bin/env python3 """ Comprehensive GPT-OSS-120B Demonstration with Output Saving """ from mlx_lm import load, generate import logging import re import time import json import matplotlib.pyplot as plt from wordcloud import WordCloud import plotly.graph_objects as go import plotly.express as px from collections import Counter import numpy as np from typing import List, Dict import textwrap import os from datetime import datetime logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) class GPTOSSDemo: def __init__(self): logger.info("šŸš€ Loading GPT-OSS-120B...") self.model, self.tokenizer = load("mlx-community/gpt-oss-120b-MXFP4-Q4") logger.info("āœ… Model loaded successfully!") self.transcript = "" self.timestamps = {} self.timestamps_2 = {} self.output_dir = f"gpt_oss_output_{datetime.now().strftime('%Y%m%d_%H%M%S')}" os.makedirs(self.output_dir, exist_ok=True) logger.info(f"šŸ“ Output directory created: {self.output_dir}") def save_output(self, content: str, filename: str): """Save content to a file in the output directory""" filepath = os.path.join(self.output_dir, filename) with open(filepath, 'w', encoding='utf-8') as f: f.write(content) logger.info(f"šŸ’¾ Saved output to: {filepath}") return filepath def save_plotly_html(self, fig, filename: str): """Save a plotly figure as HTML""" filepath = os.path.join(self.output_dir, filename) fig.write_html(filepath) logger.info(f"šŸ“Š Saved Plotly visualization to: {filepath}") return filepath def save_matplotlib_figure(self, fig, filename: str): """Save a matplotlib figure to file""" filepath = os.path.join(self.output_dir, filename) fig.savefig(filepath, bbox_inches='tight', dpi=300) logger.info(f"šŸ“ˆ Saved matplotlib figure to: {filepath}") return filepath def load_data(self, transcript_path: str, timestamps_path: str = None, timestamps_2_path: str = None): """Load lecture data""" try: with open(transcript_path, 'r', encoding='utf-8') as f: self.transcript = f.read() logger.info(f"āœ… Loaded transcript: {len(self.transcript)} characters") # Save transcript self.save_output(self.transcript, "original_transcript.txt") if timestamps_path: with open(timestamps_path, 'r', encoding='utf-8') as f: self.timestamps = json.load(f) logger.info("āœ… Loaded timestamps data") self.save_output(json.dumps(self.timestamps, indent=2), "timestamps_1.json") if timestamps_2_path: with open(timestamps_2_path, 'r', encoding='utf-8') as f: self.timestamps_2 = json.load(f) logger.info("āœ… Loaded timestamps_2 data") self.save_output(json.dumps(self.timestamps_2, indent=2), "timestamps_2.json") except Exception as e: logger.error(f"āŒ Error loading data: {e}") raise def extract_final_response(self, response: str) -> str: """Extract the final assistant response from the chat template""" if "<|start|>assistant" in response: parts = response.split("<|start|>assistant") if len(parts) > 1: final_part = parts[-1] final_part = re.sub(r'<\|channel\|>[^<]+', '', final_part) final_part = final_part.replace('<|message|>', '') final_part = final_part.replace('<|end|>', '') final_part = re.sub(r'<[^>]+>', '', final_part) final_part = final_part.strip() if final_part: return final_part cleaned = re.sub(r'<\|[^>]+\|>', '', response) cleaned = re.sub(r'<[^>]+>', '', cleaned) return cleaned.strip() def generate_response(self, prompt: str, max_tokens: int = 2048) -> str: """Generate a response with proper formatting""" try: messages = [{"role": "user", "content": prompt}] formatted_prompt = self.tokenizer.apply_chat_template( messages, add_generation_prompt=True ) response = generate( self.model, self.tokenizer, prompt=formatted_prompt, max_tokens=max_tokens, verbose=False ) return self.extract_final_response(response) except Exception as e: logger.error(f"Generation error: {e}") return f"I encountered an error: {str(e)}" def generate_tshirt_prompts(self): """Generate Flux1-Krea-dev graphic t-shirt prompts based on the lecture""" print("\n" + "=" * 80) print("šŸ‘• FLUX1-KREA-DEV T-SHIRT PROMPTS") print("=" * 80) prompt = f"""Create 3 graphic t-shirt design prompts for Flux1-Krea-dev based on Yuval Noah Harari's lecture "Storytelling, Human Cooperation, and the Rise of AI" in London on June 11, 2025. Each prompt should: 1. Include 1-2 powerful words that capture the essence of the lecture 2. Describe a visually striking design that represents the themes 3. Incorporate elements related to storytelling, AI, and human cooperation 4. Be suitable for printing on a t-shirt Lecture themes: {self.transcript[:3000]} Create 3 distinct prompts: PROMPT 1: Words: Design: PROMPT 2: Words: Design: PROMPT 3: Words: Design: """ tshirt_prompts = self.generate_response(prompt, max_tokens=1024) print(tshirt_prompts) self.save_output(tshirt_prompts, "flux1_krea_dev_tshirt_prompts.txt") # Generate additional minimalist versions print("\n" + "-" * 40) print("šŸŽØ MINIMALIST T-SHIRT DESIGNS") print("-" * 40) minimalist_prompt = f"""Create 3 minimalist t-shirt design concepts based on Yuval Noah Harari's lecture. Each should feature only 1-2 words that perfectly capture the essence of the lecture. Lecture themes: {self.transcript[:2000]} Design 1: [Word(s)] - [Brief explanation] Design 2: [Word(s)] - [Brief explanation] Design 3: [Word(s)] - [Brief explanation]""" minimalist_designs = self.generate_response(minimalist_prompt, max_tokens=512) print(minimalist_designs) self.save_output(minimalist_designs, "minimalist_tshirt_designs.txt") def generate_summaries(self): """Generate summaries of various lengths and save them""" print("\n" + "=" * 80) print("šŸ“ MULTI-LENGTH SUMMARIES") print("=" * 80) summary_lengths = [10, 150, 200, 250, 300] all_summaries = [] for length in summary_lengths: print(f"\nGenerating {length}-word summary...") prompt = f"""Create a precise {length}-word summary of this lecture. Focus on key themes: storytelling, AI risks/benefits, alignment problem, and human values. Provide only the final concise summary without any additional commentary or word counting. Transcript: {self.transcript[:6000]} {length}-word summary:""" summary = self.generate_response(prompt, max_tokens=500) # Clean up the summary to remove any analysis or word counting text clean_summary = re.sub(r'(analysis|count|words|draft|let\'s|must be exactly).*?summary:', '', summary, flags=re.IGNORECASE | re.DOTALL) clean_summary = re.sub(r'now count words.*', '', clean_summary, flags=re.IGNORECASE | re.DOTALL) clean_summary = re.sub(r'\d+ words.*', '', clean_summary) clean_summary = clean_summary.strip() print(f"āœ… {length}-word summary:") print("-" * 60) print(textwrap.fill(clean_summary, width=70)) print("-" * 60) # Save individual summary self.save_output(clean_summary, f"summary_{length}_words.txt") all_summaries.append(f"{length}-word summary:\n{clean_summary}\n\n") time.sleep(1) # Save all summaries in one file self.save_output("\n".join(all_summaries), "all_summaries.txt") def create_visualizations(self): """Create various visualizations of the lecture content and save them as HTML""" print("\n" + "=" * 80) print("šŸ“Š DATA VISUALIZATIONS") print("=" * 80) # Word frequency analysis words = re.findall(r'\b[a-zA-Z]{3,}\b', self.transcript.lower()) word_freq = Counter(words) common_words = word_freq.most_common(500) # Create Plotly bar chart and save as HTML words, counts = zip(*common_words) fig = px.bar(x=words, y=counts, title="Top 500 Words in Lecture") self.save_plotly_html(fig, "word_frequency.html") # Save word frequency data freq_data = "\n".join([f"{word}: {count}" for word, count in common_words]) self.save_output(freq_data, "word_frequency_data.txt") # Create word cloud with matplotlib (since Plotly doesn't have word cloud) print("\nGenerating word cloud...") wordcloud = WordCloud(width=800, height=400, background_color='white').generate(self.transcript) plt.figure(figsize=(10, 5)) plt.imshow(wordcloud, interpolation='bilinear') plt.axis('off') plt.title('Word Cloud of Lecture Content') self.save_matplotlib_figure(plt, "word_cloud.png") plt.close() # Topic distribution visualization using Plotly print("\nGenerating topic analysis...") topics = ['AI', 'storytelling', 'ethics', 'risk', 'cooperation', 'trust', 'alignment'] topic_counts = {topic: self.transcript.lower().count(topic) for topic in topics} fig = px.pie(values=list(topic_counts.values()), names=list(topic_counts.keys()), title='Topic Distribution in Lecture') self.save_plotly_html(fig, "topic_distribution.html") # Save topic data topic_data = "\n".join([f"{topic}: {count}" for topic, count in topic_counts.items()]) self.save_output(topic_data, "topic_data.txt") def generate_debate(self): """Generate pro and con arguments about the lecture themes and save them""" print("\n" + "=" * 80) print("āš–ļø DEBATE: AI DEVELOPMENT - PROS AND CONS") print("=" * 80) prompt = f"""Based on this lecture, create a structured debate with 5 strong arguments FOR rapid AI development and 5 strong arguments AGAINST rapid AI development. Format as two clear sections with compelling points. Lecture content: {self.transcript[:8000]} DEBATE STRUCTURE: ARGUMENTS FOR RAPID AI DEVELOPMENT: 1. 2. 3. 4. 5. ARGUMENTS AGAINST RAPID AI DEVELOPMENT: 1. 2. 3. 4. 5. """ debate = self.generate_response(prompt, max_tokens=1024) print(debate) self.save_output(debate, "ai_development_debate.txt") def write_article(self): """Generate a professional article about the lecture and save it""" print("\n" + "=" * 80) print("šŸ“° PROFESSIONAL ARTICLE") print("=" * 80) prompt = f"""Write a comprehensive 500-word article suitable for a technology publication about this Yuval Harari lecture. Include: - Key themes discussed - Importance of the alignment problem - Societal implications of AI storytelling - Expert perspectives from the lecture - Future outlook Transcript: {self.transcript[:10000]} ARTICLE:""" article = self.generate_response(prompt, max_tokens=1024) print(article) self.save_output(article, "professional_article.txt") def write_editorial(self): """Generate an editorial opinion piece and save it""" print("\n" + "=" * 80) print("āœļø EDITORIAL OPINION") print("=" * 80) prompt = f"""Write a compelling editorial (400 words) expressing a strong viewpoint about the issues raised in this lecture. Take a clear stance on AI regulation and development, supporting your position with evidence from the lecture. Key lecture points: {self.transcript[:5000]} EDITORIAL:""" editorial = self.generate_response(prompt, max_tokens=1024) print(editorial) self.save_output(editorial, "editorial_opinion.txt") def generate_qna(self): """Generate questions and answers about the lecture and save them""" print("\n" + "=" * 80) print("ā“ Q&A SESSION") print("=" * 80) prompt = f"""Create 10 insightful questions about this lecture with detailed answers. Focus on the most important and controversial aspects. Lecture content: {self.transcript[:6000]} QUESTIONS AND ANSWERS: 1. Q: A: 2. Q: A: [Continue for 10 questions]""" qna = self.generate_response(prompt, max_tokens=4096) print(qna) self.save_output(qna, "qna_session.txt") def is_whisper_format(self, data): """Check if the timestamp data is in OpenAI Whisper format""" return 'segments' in data and isinstance(data['segments'], list) and len(data['segments']) > 0 and 'start' in data['segments'][0] def convert_whisper_to_timeline(self, whisper_data): """Convert Whisper format to timeline format""" timeline = {"sections": []} for i, segment in enumerate(whisper_data.get('segments', [])): start_time = segment.get('start', 0) end_time = segment.get('end', 0) text = segment.get('text', '').strip() # Convert seconds to HH:MM:SS format start_minutes, start_seconds = divmod(start_time, 60) start_hours, start_minutes = divmod(start_minutes, 60) start_str = f"{int(start_hours):02d}:{int(start_minutes):02d}:{int(start_seconds):02d}" end_minutes, end_seconds = divmod(end_time, 60) end_hours, end_minutes = divmod(end_minutes, 60) end_str = f"{int(end_hours):02d}:{int(end_minutes):02d}:{int(end_seconds):02d}" # Create a short topic from the text topic = text[:50] + "..." if len(text) > 50 else text if not topic: topic = f"Segment {i+1}" timeline["sections"].append({ "start_time": start_str, "end_time": end_str, "topic": topic, "text": text }) return timeline def create_timeline_visualization_plotly(self, timestamps_data, title, filename): """Create timeline visualization using Plotly and save as HTML""" if not timestamps_data or 'sections' not in timestamps_data: return # Extract data for visualization segments = [] durations = [] labels = [] for i, section in enumerate(timestamps_data.get('sections', [])): if 'start_time' in section and 'end_time' in section: # Parse time strings to calculate duration start_parts = section['start_time'].split(':') end_parts = section['end_time'].split(':') if len(start_parts) == 3 and len(end_parts) == 3: start_sec = int(start_parts[0]) * 3600 + int(start_parts[1]) * 60 + int(start_parts[2]) end_sec = int(end_parts[0]) * 3600 + int(end_parts[1]) * 60 + int(end_parts[2]) duration = end_sec - start_sec if duration > 0: # Only include segments with positive duration segments.append(i) durations.append(duration) labels.append(f"Seg {i+1}") if durations: # Create pie chart for timeline 1 if "TIMELINE 1" in title: fig = px.pie(values=durations, names=labels, title=f'{title} - Segment Durations') self.save_plotly_html(fig, filename.replace('.txt', '_durations_pie.html')) # Create bar chart for other timelines else: fig = px.bar(x=segments, y=durations, title=f'{title} - Segment Durations', labels={'x': 'Segment Number', 'y': 'Duration (seconds)'}) fig.update_layout(xaxis=dict(tickvals=segments, ticktext=labels)) self.save_plotly_html(fig, filename.replace('.txt', '_durations.html')) def create_timeline(self, timestamps_data, title, filename): """Create a visual timeline from timestamps data and save it""" if not timestamps_data: print(f"No timestamp data available for {title}") return print(f"\nā° {title}") print("=" * 80) # Check if data is in Whisper format and convert if needed if self.is_whisper_format(timestamps_data): print("Detected Whisper format - converting to timeline format") timestamps_data = self.convert_whisper_to_timeline(timestamps_data) # Extract timeline data times = [] topics = [] full_texts = [] for section in timestamps_data.get('sections', []): start_time = section.get('start_time', '00:00:00') topic = section.get('topic', 'Unknown') full_text = section.get('text', '') times.append(start_time) topics.append(topic) full_texts.append(full_text) # Create a text-based timeline timeline_text = f"{title}\n\n" for i, (time, topic, text) in enumerate(zip(times, topics, full_texts), 1): timeline_text += f"{i}. {time} - {topic}\n" if text: timeline_text += f" Text: {text}\n" timeline_text += "\n" print(timeline_text) self.save_output(timeline_text, filename) # Create visualization using Plotly self.create_timeline_visualization_plotly(timestamps_data, title, filename) def create_timelines(self): """Create timelines for both timestamp files and save them""" print("\n" + "=" * 80) print("ā° LECTURE TIMELINES") print("=" * 80) # Create timeline for first timestamp file self.create_timeline(self.timestamps, "LECTURE TIMELINE 1", "timeline_1.txt") # Create timeline for second timestamp file self.create_timeline(self.timestamps_2, "LECTURE TIMELINE 2", "timeline_2.txt") def generate_key_insights(self): """Generate key insights with visual representation and save them""" print("\n" + "=" * 80) print("šŸ’” KEY INSIGHTS ANALYSIS") print("=" * 80) prompt = f"""Extract the 7 most profound insights from this lecture. For each insight: 1. State the insight clearly 2. Explain its significance 3. Provide supporting evidence from the lecture 4. Rate its importance (1-10) Lecture: {self.transcript[:8000]} KEY INSIGHTS:""" insights = self.generate_response(prompt, max_tokens=1024) print(insights) self.save_output(insights, "key_insights.txt") # Create a radar chart of insight importance using Plotly print("\nGenerating insights visualization...") categories = ['Storytelling Power', 'AI Risks', 'Alignment Challenge', 'Ethical Frameworks', 'Human Cooperation', 'Trust Issues', 'Future Implications'] values = [8, 9, 9, 7, 8, 8, 9] # Example values # Create radar chart with Plotly fig = go.Figure(data=go.Scatterpolar( r=values, theta=categories, fill='toself' )) fig.update_layout( polar=dict(radialaxis=dict(visible=True, range=[0, 10])), title="Importance of Lecture Themes" ) self.save_plotly_html(fig, "insights_radar_chart.html") def generate_recommendations(self): """Generate policy and personal recommendations and save them""" print("\n" + "=" * 80) print("šŸ“‹ POLICY AND PERSONAL RECOMMENDATIONS") print("=" * 80) prompt = f"""Based on this lecture, create: 1. 5 policy recommendations for governments 2. 5 recommendations for AI companies 3. 5 personal actions individuals can take 4. 3 global cooperation initiatives needed Lecture content: {self.transcript[:7000]} RECOMMENDATIONS:""" recommendations = self.generate_response(prompt, max_tokens=4096) print(recommendations) self.save_output(recommendations, "recommendations.txt") def create_readme(self): """Create a README file with information about all generated content""" readme_content = f"""# GPT-OSS-120B Analysis Output ## Analysis of Yuval Noah Harari Lecture on AI and Humanity ### Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} ### Contents: 1. **Original Transcript** - The complete lecture transcript 2. **Multi-Length Summaries** - Summaries of various lengths (10-300 words) 3. **Data Visualizations** - Word frequency, word cloud, and topic distribution 4. **AI Development Debate** - Pros and cons of rapid AI development 5. **Professional Article** - Technology publication-style article 6. **Editorial Opinion** - Strong viewpoint on AI regulation 7. **Q&A Session** - 10 insightful questions with detailed answers 8. **Lecture Timelines** - Text and visual timelines of the lecture structure 9. **Key Insights** - 7 profound insights with significance and ratings 10. **Recommendations** - Policy, personal, and global cooperation recommendations 11. **T-Shirt Designs** - Flux1-Krea-dev graphic t-shirt prompts ### Visualization Files: - HTML files: Interactive Plotly visualizations - PNG files: Static images (word cloud) ### Model Information: - Model: GPT-OSS-120B (4-bit quantized) - Parameters: 120 billion - Hardware: Apple M3 Ultra with 512GB RAM ### Analysis Themes: - Storytelling as human differentiator - AI risks and benefits - Alignment problem - Ethical frameworks - Human cooperation and trust - Future implications of AI """ self.save_output(readme_content, "README.md") def run_comprehensive_demo(self): """Run the complete demonstration and save all outputs""" print("šŸš€ Starting Comprehensive GPT-OSS-120B Demonstration") print("šŸ’¾ Model: 120B parameters, 4-bit quantized") print("šŸ“š Analyzing: Yuval Noah Harari Lecture on AI and Humanity") print("=" * 80) # Load data self.load_data( "yuval_harari_lecture_transcript.txt", "yuval_harari_lecture_timestamps.json", "yuval_harari_lecture_timestamps_2.json" ) # Run all demonstrations demonstrations = [ self.generate_summaries, self.create_visualizations, self.generate_debate, self.write_article, self.write_editorial, self.generate_qna, self.create_timelines, self.generate_key_insights, self.generate_recommendations, self.generate_tshirt_prompts, self.create_readme ] for demo in demonstrations: try: demo() time.sleep(2) except Exception as e: logger.error(f"Error in demonstration: {e}") continue print(f"\nšŸŽ‰ All outputs saved to: {self.output_dir}") print("šŸ“‹ Contents:") for file in os.listdir(self.output_dir): print(f" - {file}") if __name__ == "__main__": demo = GPTOSSDemo() demo.run_comprehensive_demo()