# At the top of utils.py import re import json import streamlit as st import pandas as pd import plotly.express as px from datetime import datetime from langchain_openai import ChatOpenAI from reportlab.lib import colors from reportlab.lib.pagesizes import letter, landscape from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, PageBreak from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle def update_progress(container, percentage, message=""): if container: progress_bar = container.progress(percentage / 100) container.write(message) def extract_metrics(text): """Extract metrics from text with error handling""" llm = ChatOpenAI(temperature=0, model="gpt-4") metrics_prompt = """Extract the following metrics as JSON from the text: - Market size (with currency) - CAGR (%) - Market leader's share (%) - Number of key players - Key regions - Dominant segment Text: {text} Return in JSON format with these exact keys: market_size, cagr, leader_share, key_players, key_regions, dominant_segment If a metric is not found, use "N/A" as the value.""" try: response = llm.invoke(metrics_prompt.format(text=text)) if response and response.content: json_str = re.search(r'\{.*\}', response.content, re.DOTALL) if json_str: return json.loads(json_str.group()) except Exception as e: st.error(f"Error extracting metrics: {str(e)}") # Return default metrics if extraction fails return { 'market_size': 'N/A', 'cagr': 'N/A', 'leader_share': 'N/A', 'key_players': 'N/A', 'key_regions': 'N/A', 'dominant_segment': 'N/A' } def enhance_report_with_gpt4(base_report, topic): """Enhance report with GPT-4""" try: llm = ChatOpenAI(temperature=0.7, model="gpt-4") if not base_report: return "No base report provided to enhance." prompt = f"""Create a professional market research report for {topic} based on this research: {base_report} Structure the report with: # Executive Summary - Brief overview - Key findings - Market highlights # Market Overview - Current market size and growth - Geographic distribution - Market segmentation # Competitive Analysis - Key players and market shares - Competitive strategies - SWOT analysis # Market Dynamics - Growth drivers - Market challenges - Entry barriers # Industry Trends - Technology trends - Consumer behavior - Regulatory landscape # Future Outlook - Market projections - Emerging opportunities - Risk factors # Strategic Recommendations - Short-term strategies - Long-term opportunities - Risk mitigation""" response = llm.invoke(prompt) return response.content if response else base_report except Exception as e: st.error(f"Error enhancing report: {str(e)}") return base_report def generate_visual_data(metrics): try: # Prepare data for visualizations market_data = { 'Market Size': metrics.get('market_size', 'N/A'), 'CAGR': metrics.get('cagr', 'N/A'), 'Leader Share': metrics.get('leader_share', 'N/A'), 'Key Players': metrics.get('key_players', 'N/A') } return market_data except Exception as e: st.error(f"Error generating visualizations: {str(e)}") return {} def process_crew_output(crew_result, topic): try: # Initialize default outputs agent_outputs = { 'researcher': { 'raw_output': '', 'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'), 'analysis_type': 'Market Research' }, 'analyst': { 'raw_output': '', 'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'), 'analysis_type': 'Data Analysis' }, 'writer': { 'raw_output': '', 'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'), 'analysis_type': 'Report Writing' } } # Get base report base_report = str(crew_result) if crew_result else "No report generated" # Extract individual agent outputs from crew_result if hasattr(crew_result, 'tasks'): for task in crew_result.tasks: if 'research' in task.agent.role.lower(): agent_outputs['researcher']['raw_output'] = task.output if task.output else "No research output available" elif 'analyst' in task.agent.role.lower(): agent_outputs['analyst']['raw_output'] = task.output if task.output else "No analysis output available" elif 'writer' in task.agent.role.lower(): agent_outputs['writer']['raw_output'] = task.output if task.output else "No writer output available" # Extract metrics with error handling try: metrics = extract_metrics(base_report) except Exception as e: st.warning(f"Warning extracting metrics: {str(e)}") metrics = { 'market_size': 'N/A', 'cagr': 'N/A', 'leader_share': 'N/A', 'key_players': 'N/A', 'key_regions': 'N/A', 'dominant_segment': 'N/A' } # Generate enhanced report content try: enhanced_content = enhance_report_with_gpt4(base_report, topic) except Exception as e: st.warning(f"Warning enhancing report: {str(e)}") enhanced_content = base_report # Extract market data try: market_data = extract_market_data(base_report) except Exception as e: st.warning(f"Warning extracting market data: {str(e)}") market_data = { "marketShares": [], "growthTrend": [], "regionalDistribution": [], "techAdoption": [], "keyPlayers": [] } return { 'metrics': metrics, 'content': enhanced_content, 'raw': base_report, 'agent_outputs': agent_outputs, 'market_data': market_data } except Exception as e: st.error(f"Error processing report: {str(e)}") # Return default structure return { 'metrics': { 'market_size': 'N/A', 'cagr': 'N/A', 'leader_share': 'N/A', 'key_players': 'N/A', 'key_regions': 'N/A', 'dominant_segment': 'N/A' }, 'content': "Error generating report content", 'raw': str(crew_result) if crew_result else "No report generated", 'agent_outputs': agent_outputs, 'market_data': { "marketShares": [], "growthTrend": [], "regionalDistribution": [], "techAdoption": [], "keyPlayers": [] } } def extract_market_data(text): """Extract structured market data for visualizations""" llm = ChatOpenAI(temperature=0, model="gpt-4") data_prompt = """Extract the following data points in JSON format: 1. Market shares of key players 2. Growth trends over years 3. Regional distribution 4. Technology adoption rates 5. Company profiles with recent developments Text: {text} Return as JSON with these keys: { "marketShares": [{"company": "Company Name", "share": number}], "growthTrend": [{"year": "YYYY", "growth": number}], "regionalDistribution": [{"region": "Region Name", "share": number}], "techAdoption": [{"name": "Tech Name", "adoptionRate": number}], "keyPlayers": [{"company": "Company Name", "marketShare": number, "strengths": "text", "developments": "text"}] } Use "N/A" for missing values.""" try: response = llm.invoke(data_prompt.format(text=text)) if response and response.content: json_str = re.search(r'\{.*\}', response.content, re.DOTALL) if json_str: return json.loads(json_str.group()) except Exception as e: st.error(f"Error extracting market data: {str(e)}") # Return default structure if extraction fails return { "marketShares": [], "growthTrend": [], "regionalDistribution": [], "techAdoption": [], "keyPlayers": [] } def display_presentation_slide(slide, slide_num, total_slides): """Display a single presentation slide""" st.markdown(f"## {slide['title']}") if slide['type'] == 'title': st.markdown(f"
CAGR: {}
Key Players: {}
Dominant Segment: {}