# utils.py import re import json import streamlit as st def update_progress(container, percentage, message=""): """Update the progress bar""" progress_html = f"""
{message}
""" container.markdown(progress_html, unsafe_allow_html=True) def extract_section(text, section_name): """Extract a section from the text""" try: # Try multiple patterns to find the section patterns = [ f"{section_name}.*?\n(.*?)(?=\n\n|$)", f"{section_name}[:\s](.*?)(?=\n\n|$)", f"{section_name}:\s*(.*?)(?=\n|$)", f"{section_name}\s*(.*?)(?=\n|$)" ] for pattern in patterns: match = re.search(pattern, text, re.DOTALL | re.IGNORECASE) if match: return match.group(1).strip() return f"No {section_name.lower()} information found" except Exception as e: print(f"Error extracting {section_name}: {str(e)}") return f"Error extracting {section_name.lower()}" def extract_sources(text): """Extract sources from the text""" try: sources = [] patterns = [ r"Source:.*?(?:\n|$)", r"\[.*?\]", r"\(https?://.*?\)", r"Reference:.*?(?:\n|$)", r"Retrieved from:.*?(?:\n|$)" ] for pattern in patterns: matches = re.finditer(pattern, text, re.MULTILINE) sources.extend([match.group().strip() for match in matches]) return sources if sources else ["Sources not explicitly mentioned"] except Exception as e: print(f"Error extracting sources: {str(e)}") return ["Error extracting sources"] def format_json_output(raw_output): """Format CrewOutput into proper JSON structure""" try: # Get raw text from output if hasattr(raw_output, 'raw_output'): raw_text = str(raw_output.raw_output) else: raw_text = str(raw_output) print("Raw text received:", raw_text[:500]) # Debug print # Try to find and parse JSON structure try: json_pattern = r"\{[\s\S]*\}" match = re.search(json_pattern, raw_text) if match: json_str = match.group() parsed_json = json.loads(json_str) print("Successfully parsed JSON:", json_str[:500]) # Debug print # Ensure all required fields exist if isinstance(parsed_json, dict): return { "exec_summary": { "summary": parsed_json.get('exec_summary', {}).get('summary', extract_section(raw_text, "Executive Summary")), "market_size": parsed_json.get('exec_summary', {}).get('market_size', extract_section(raw_text, "Market Size")), "growth_rate": parsed_json.get('exec_summary', {}).get('growth_rate', extract_section(raw_text, "Growth Rate")), "key_players": parsed_json.get('exec_summary', {}).get('key_players', extract_section(raw_text, "Key Players")) }, "detailed_report": parsed_json.get('detailed_report', raw_text), "sources": parsed_json.get('sources', extract_sources(raw_text)), "metrics": parsed_json.get('metrics', { "market_size_data": [], "growth_rates": [], "market_shares": {} }) } except json.JSONDecodeError as e: print(f"JSON parsing error: {str(e)}") # Debug print # If JSON parsing fails, create structured format from raw text print("Creating structured format from raw text") # Debug print return { "exec_summary": { "summary": extract_section(raw_text, "Executive Summary"), "market_size": extract_section(raw_text, "Market Size"), "growth_rate": extract_section(raw_text, "Growth Rate"), "key_players": extract_section(raw_text, "Key Players") }, "detailed_report": raw_text, "sources": extract_sources(raw_text), "metrics": { "market_size_data": [], "growth_rates": [], "market_shares": {} } } except Exception as e: print(f"Error in format_json_output: {str(e)}") # Debug print # Return a safe default structure return { "exec_summary": { "summary": "Error processing report", "market_size": "Data not available", "growth_rate": "Data not available", "key_players": "Data not available" }, "detailed_report": raw_text if 'raw_text' in locals() else str(raw_output), "sources": [], "metrics": { "market_size_data": [], "growth_rates": [], "market_shares": {} } }