Spaces:
Sleeping
Sleeping
| # utils.py | |
| import re | |
| import json | |
| import streamlit as st | |
| def update_progress(container, percentage, message=""): | |
| """Update the progress bar""" | |
| progress_html = f""" | |
| <div class="progress-container"> | |
| <div class="progress-bar" style="width: {percentage}%">{message}</div> | |
| </div> | |
| """ | |
| container.markdown(progress_html, unsafe_allow_html=True) | |
| def extract_section(text, section_name): | |
| """Extract a section from the text""" | |
| try: | |
| # Try multiple patterns to find the section | |
| patterns = [ | |
| f"{section_name}.*?\n(.*?)(?=\n\n|$)", | |
| f"{section_name}[:\s](.*?)(?=\n\n|$)", | |
| f"{section_name}:\s*(.*?)(?=\n|$)", | |
| f"{section_name}\s*(.*?)(?=\n|$)" | |
| ] | |
| for pattern in patterns: | |
| match = re.search(pattern, text, re.DOTALL | re.IGNORECASE) | |
| if match: | |
| return match.group(1).strip() | |
| return f"No {section_name.lower()} information found" | |
| except Exception as e: | |
| print(f"Error extracting {section_name}: {str(e)}") | |
| return f"Error extracting {section_name.lower()}" | |
| def extract_sources(text): | |
| """Extract sources from the text""" | |
| try: | |
| sources = [] | |
| patterns = [ | |
| r"Source:.*?(?:\n|$)", | |
| r"\[.*?\]", | |
| r"\(https?://.*?\)", | |
| r"Reference:.*?(?:\n|$)", | |
| r"Retrieved from:.*?(?:\n|$)" | |
| ] | |
| for pattern in patterns: | |
| matches = re.finditer(pattern, text, re.MULTILINE) | |
| sources.extend([match.group().strip() for match in matches]) | |
| return sources if sources else ["Sources not explicitly mentioned"] | |
| except Exception as e: | |
| print(f"Error extracting sources: {str(e)}") | |
| return ["Error extracting sources"] | |
| def format_json_output(raw_output): | |
| """Format CrewOutput into proper JSON structure""" | |
| try: | |
| # Get raw text from output | |
| if hasattr(raw_output, 'raw_output'): | |
| raw_text = str(raw_output.raw_output) | |
| else: | |
| raw_text = str(raw_output) | |
| print("Raw text received:", raw_text[:500]) # Debug print | |
| # Try to find and parse JSON structure | |
| try: | |
| json_pattern = r"\{[\s\S]*\}" | |
| match = re.search(json_pattern, raw_text) | |
| if match: | |
| json_str = match.group() | |
| parsed_json = json.loads(json_str) | |
| print("Successfully parsed JSON:", json_str[:500]) # Debug print | |
| # Ensure all required fields exist | |
| if isinstance(parsed_json, dict): | |
| return { | |
| "exec_summary": { | |
| "summary": parsed_json.get('exec_summary', {}).get('summary', | |
| extract_section(raw_text, "Executive Summary")), | |
| "market_size": parsed_json.get('exec_summary', {}).get('market_size', | |
| extract_section(raw_text, "Market Size")), | |
| "growth_rate": parsed_json.get('exec_summary', {}).get('growth_rate', | |
| extract_section(raw_text, "Growth Rate")), | |
| "key_players": parsed_json.get('exec_summary', {}).get('key_players', | |
| extract_section(raw_text, "Key Players")) | |
| }, | |
| "detailed_report": parsed_json.get('detailed_report', raw_text), | |
| "sources": parsed_json.get('sources', extract_sources(raw_text)), | |
| "metrics": parsed_json.get('metrics', { | |
| "market_size_data": [], | |
| "growth_rates": [], | |
| "market_shares": {} | |
| }) | |
| } | |
| except json.JSONDecodeError as e: | |
| print(f"JSON parsing error: {str(e)}") # Debug print | |
| # If JSON parsing fails, create structured format from raw text | |
| print("Creating structured format from raw text") # Debug print | |
| return { | |
| "exec_summary": { | |
| "summary": extract_section(raw_text, "Executive Summary"), | |
| "market_size": extract_section(raw_text, "Market Size"), | |
| "growth_rate": extract_section(raw_text, "Growth Rate"), | |
| "key_players": extract_section(raw_text, "Key Players") | |
| }, | |
| "detailed_report": raw_text, | |
| "sources": extract_sources(raw_text), | |
| "metrics": { | |
| "market_size_data": [], | |
| "growth_rates": [], | |
| "market_shares": {} | |
| } | |
| } | |
| except Exception as e: | |
| print(f"Error in format_json_output: {str(e)}") # Debug print | |
| # Return a safe default structure | |
| return { | |
| "exec_summary": { | |
| "summary": "Error processing report", | |
| "market_size": "Data not available", | |
| "growth_rate": "Data not available", | |
| "key_players": "Data not available" | |
| }, | |
| "detailed_report": raw_text if 'raw_text' in locals() else str(raw_output), | |
| "sources": [], | |
| "metrics": { | |
| "market_size_data": [], | |
| "growth_rates": [], | |
| "market_shares": {} | |
| } | |
| } |