import os import re import time import logging import json import gradio as gr from datetime import datetime from typing import List, Dict, Tuple # Import custom modules from services.research_engine import ResearchEngine from services.ai_content_generator import AIContentGenerator from services.email_service import EmailService from utils.data_validator import DataValidator from utils.chart_generator import ChartGenerator # Configure logging logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" ) class ProfessionalNewsletterGenerator: """Professional Newsletter Generation System with Data-Centric Research""" def __init__(self): self.research_engine = ResearchEngine() self.ai_generator = AIContentGenerator() self.email_service = EmailService() self.data_validator = DataValidator() self.chart_generator = ChartGenerator() def generate_professional_outline(self, topic: str, num_sections: int = 5) -> List[str]: """Generate a professional, data-focused newsletter outline""" logging.info(f"Generating professional outline for: {topic}") # Enhanced prompts for professional, data-centric content prompt = f"""Create a professional, data-driven newsletter outline for "{topic}". Requirements: - Focus on quantifiable metrics and research findings - Include current market data and trends - Emphasize professional insights and expert opinions - Structure for business and academic audiences - Prioritize credible sources and factual content Generate {num_sections} distinct sections that cover: 1. Current data and statistics 2. Expert analysis and insights 3. Market trends and forecasts 4. Case studies and real-world applications 5. Future implications and recommendations Topic: {topic}""" outline = self.ai_generator.generate_outline(prompt, num_sections) return outline def conduct_comprehensive_research(self, outline: List[str], topic: str) -> Dict: """Conduct comprehensive research with data validation""" logging.info(f"Conducting comprehensive research for {len(outline)} sections") research_results = {} for section in outline: logging.info(f"Researching section: {section}") # Multi-source research search_results = self.research_engine.search_multiple_sources(section, topic) logging.info(f"Search results for {section}: {len(search_results.get('sources', []))} sources found") # Data validation and fact-checking validated_data = self.data_validator.validate_research_data(search_results) # Extract key metrics and statistics metrics = self.data_validator.extract_metrics(validated_data) logging.info(f"Extracted {len(metrics)} metrics for {section}") research_results[section] = { 'content': validated_data, 'metrics': metrics, 'sources': search_results.get('sources', []), 'credibility_score': self.data_validator.calculate_credibility_score(search_results) } time.sleep(1) # Rate limiting return research_results def generate_data_visualizations(self, research_data: Dict) -> Dict: """Generate data visualizations for newsletter content""" logging.info("Generating data visualizations") charts = {} for section, data in research_data.items(): if data['metrics']: # Generate appropriate charts based on data type chart_config = self.chart_generator.create_chart_config( data['metrics'], section ) if chart_config: charts[section] = chart_config logging.info(f"Generated chart for section: {section}") else: logging.warning(f"No chart generated for section: {section}") else: logging.info(f"No metrics available for section: {section}") logging.info(f"Total charts generated: {len(charts)}") return charts def create_professional_newsletter( self, topic: str, outline: List[str], research_data: Dict, charts: Dict ) -> str: """Create professional HTML newsletter with data visualizations""" logging.info(f"Creating professional newsletter for: {topic}") # Generate comprehensive content for each section all_content_sections = [] for section in outline: section_data = research_data.get(section, {}) # Create detailed section content section_prompt = f"""Write a heartfelt, conversational section about {section} for our newsletter on {topic}. Style Guidelines: - Write like you're sharing insights with a trusted colleague - Use warm, personal language while maintaining professionalism - Include specific data and metrics naturally in the conversation - Start with engaging phrases like "What really caught my attention..." or "Here's something fascinating..." - Explain why the data matters and what it means for the reader - End each section with actionable takeaways Available Data for {section}: - Sources: {len(section_data.get('sources', []))} credible references - Key Metrics: {section_data.get('metrics', [])} - Content: {section_data.get('content', {})} Write 3-4 substantial paragraphs that tell a compelling story with the data.""" section_content = self.ai_generator.generate_section_content( section_prompt, section, section_data ) all_content_sections.append({ 'title': section, 'content': section_content, 'metrics': section_data.get('metrics', []), 'sources': section_data.get('sources', []) }) # Combine all sections into full newsletter content newsletter_content = self._format_newsletter_sections(all_content_sections) # Render HTML template with data html_newsletter = self._render_newsletter_template( topic, newsletter_content, charts, research_data ) return html_newsletter def _render_newsletter_template( self, topic: str, content: str, charts: Dict, research_data: Dict ) -> str: """Render professional HTML newsletter template""" # Load template and inject content with open('templates/newsletter.html', 'r') as f: template = f.read() # Calculate analytics total_sources = sum(len(data.get('sources', [])) for data in research_data.values()) avg_credibility = sum(data.get('credibility_score', 0) for data in research_data.values()) / len(research_data) # Template variables template_vars = { 'topic': topic, 'content': content, 'charts_json': json.dumps(charts), 'date': datetime.now().strftime("%B %d, %Y"), 'total_sources': total_sources, 'credibility_score': f"{avg_credibility:.1f}/10", 'research_summary': self._generate_research_summary(research_data) } # Replace template variables for key, value in template_vars.items(): template = template.replace(f'{{{{ {key} }}}}', str(value)) return template def _generate_research_summary(self, research_data: Dict) -> str: """Generate executive summary of research findings""" key_metrics = [] for section, data in research_data.items(): if data['metrics']: key_metrics.extend(data['metrics'][:2]) # Top 2 metrics per section return f"Analysis based on {len(key_metrics)} key data points from {len(research_data)} research areas." def _format_newsletter_sections(self, sections: List[Dict]) -> str: """Format individual sections into cohesive newsletter content""" formatted_content = "" for i, section in enumerate(sections): formatted_content += f"""

{section['title']}

{section['content']}
{"
" if section['metrics'] else ""} {"

📊 Key Data Points:

" if section['metrics'] else ""} {"" if section['metrics'] else ""} {"
" if section['metrics'] else ""} {"
" if section['sources'] else ""} {"

Sources:

" if section['sources'] else ""} {"

" if section['sources'] else ""} {", ".join([f"{source.split('/')[2] if '/' in source else source}" for source in section['sources'][:3]])} {"

" if section['sources'] else ""} {"
" if section['sources'] else ""}
""" return formatted_content def export_newsletter(self, newsletter_html: str, format_type: str = 'html') -> str: """Export newsletter in different formats""" if format_type == 'html': return newsletter_html elif format_type == 'pdf': # Would implement PDF generation here return "PDF export feature coming soon" else: return newsletter_html def create_project_zip(self, newsletter_html: str, topic: str) -> str: """Create a complete zip file of the entire project""" import zipfile import tempfile import shutil import os from datetime import datetime # Create temporary directory for zip contents temp_dir = tempfile.mkdtemp() zip_filename = f"newsletter_project_{topic.replace(' ', '_')}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.zip" zip_path = os.path.join(temp_dir, zip_filename) try: with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf: # Add main project files project_files = [ 'main.py', 'pyproject.toml', 'replit.md', '.replit' ] for file_path in project_files: if os.path.exists(file_path): zipf.write(file_path, file_path) # Add entire directories directories = ['services', 'utils', 'templates', 'static'] for directory in directories: if os.path.exists(directory): for root, dirs, files in os.walk(directory): for file in files: file_path = os.path.join(root, file) zipf.write(file_path, file_path) # Add the generated newsletter as a separate file newsletter_filename = f"generated_newsletter_{datetime.now().strftime('%Y%m%d_%H%M%S')}.html" zipf.writestr(newsletter_filename, newsletter_html) # Add a README for the zip contents readme_content = f"""# Newsletter Generation Project Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} Topic: {topic} ## Contents: - Complete source code for AI-powered newsletter generation - Generated newsletter: {newsletter_filename} - All dependencies and configuration files ## To run this project: 1. Install dependencies: pip install -r requirements.txt 2. Set up environment variables (API keys) 3. Run: python main.py ## Features: - AI-powered content generation - Multi-source research engine - Data visualization with Chart.js - Professional HTML templates - Email distribution system """ zipf.writestr("README.md", readme_content) # Read zip file content to return with open(zip_path, 'rb') as f: zip_content = f.read() # Clean up shutil.rmtree(temp_dir) # Return the path for download (Gradio will handle the file) return zip_path except Exception as e: logging.error(f"Error creating project zip: {e}") return None def create_gradio_interface(): """Create Gradio interface for the newsletter generator""" generator = ProfessionalNewsletterGenerator() def generate_newsletter(topic, num_sections, recipients): """Main function to generate newsletter""" try: # Step 1: Generate outline yield "🔍 Generating professional outline...", "", "" outline = generator.generate_professional_outline(topic, num_sections) # Step 2: Research yield "📊 Conducting comprehensive research...", "", "" research_data = generator.conduct_comprehensive_research(outline, topic) # Step 3: Generate visualizations yield "📈 Creating data visualizations...", "", "" charts = generator.generate_data_visualizations(research_data) # Step 4: Create newsletter yield "✍️ Generating professional newsletter...", "", "" newsletter = generator.create_professional_newsletter(topic, outline, research_data, charts) # Step 5: Send emails if recipients provided if recipients.strip(): yield "📧 Sending newsletters...", newsletter, "" result = generator.email_service.send_newsletter(newsletter, recipients.split(','), topic) yield "✅ Newsletter generated and sent successfully!", newsletter, result else: yield "✅ Newsletter generated successfully!", newsletter, "No recipients specified - newsletter not sent" except Exception as e: logging.error(f"Error generating newsletter: {e}") yield f"❌ Error: {str(e)}", "", "" # Gradio interface with gr.Blocks(title="Professional Newsletter Generator", theme=gr.themes.Soft()) as interface: gr.Markdown(""" # 📰 Professional Newsletter Generator Generate data-driven, research-based newsletters with AI-powered content and real-time data analysis. ## Features: - 🔍 Multi-source research and data validation - 📊 Automatic data visualization generation - 🎯 Professional formatting and citations - 📧 Email distribution system - 📈 Analytics and credibility scoring """) with gr.Row(): with gr.Column(scale=1): topic_input = gr.Textbox( label="Newsletter Topic", placeholder="e.g., Artificial Intelligence in Healthcare 2024", lines=2 ) sections_input = gr.Slider( minimum=3, maximum=8, value=5, step=1, label="Number of Sections" ) recipients_input = gr.Textbox( label="Email Recipients (comma-separated)", placeholder="email1@example.com, email2@example.com", lines=2 ) generate_btn = gr.Button("🚀 Generate Professional Newsletter", variant="primary") with gr.Column(scale=2): status_output = gr.Textbox( label="Generation Status", lines=2, interactive=False ) newsletter_output = gr.HTML( label="Generated Newsletter" ) # Manual editing interface with gr.Accordion("Manual Template Editing", open=False): template_editor = gr.Code( label="Edit Newsletter HTML", language="html", lines=15 ) update_preview_btn = gr.Button("Update Preview", variant="secondary") # Download options with gr.Row(): download_html_btn = gr.DownloadButton( label="📥 Download HTML", variant="primary" ) email_status = gr.Textbox( label="Email Status", lines=2, interactive=False ) # Store newsletter content for downloads newsletter_content = gr.State() def update_template_editor(newsletter_html): return newsletter_html if newsletter_html else "" def update_preview_from_editor(edited_html): return edited_html def prepare_download(newsletter_html): import tempfile from datetime import datetime if not newsletter_html: return None # Create temporary file with timestamp timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") filename = f"newsletter_{timestamp}.html" temp_file = tempfile.NamedTemporaryFile(mode='w', suffix='.html', delete=False, prefix="newsletter_") temp_file.write(newsletter_html) temp_file.close() return temp_file.name # Main generation event chain generate_btn.click( fn=generate_newsletter, inputs=[topic_input, sections_input, recipients_input], outputs=[status_output, newsletter_output, email_status] ).then( fn=update_template_editor, inputs=[newsletter_output], outputs=[template_editor] ).then( fn=lambda x: x, inputs=[newsletter_output], outputs=[newsletter_content] ) # Manual editing events update_preview_btn.click( fn=update_preview_from_editor, inputs=[template_editor], outputs=[newsletter_output] ).then( fn=lambda x: x, inputs=[template_editor], outputs=[newsletter_content] ) # Download event download_html_btn.click( fn=prepare_download, inputs=[newsletter_content], outputs=[download_html_btn] ) # Add examples gr.Examples( examples=[ ["Sustainable Energy Technologies Market Analysis", 5, ""], ["Global Economic Trends and Financial Markets", 6, ""], ["Healthcare Innovation and Digital Transformation", 4, ""], ["Climate Change Policy and Environmental Impact", 5, ""] ], inputs=[topic_input, sections_input, recipients_input] ) return interface if __name__ == "__main__": # Create and launch the interface app = create_gradio_interface() app.launch( server_name="0.0.0.0", server_port=7860, share=False, debug=False )