Spaces:
Sleeping
Sleeping
File size: 5,498 Bytes
f3ed66a 2b474dd f3ed66a 2b474dd f3ed66a 2b474dd e5de79f f3ed66a 2b474dd e5de79f f3ed66a 2b474dd e5de79f 2b474dd e5de79f 2b474dd e5de79f 2b474dd e5de79f 2b474dd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
# utils.py
import re
import json
import streamlit as st
def update_progress(container, percentage, message=""):
"""Update the progress bar"""
progress_html = f"""
<div class="progress-container">
<div class="progress-bar" style="width: {percentage}%">{message}</div>
</div>
"""
container.markdown(progress_html, unsafe_allow_html=True)
def extract_section(text, section_name):
"""Extract a section from the text"""
try:
# Try multiple patterns to find the section
patterns = [
f"{section_name}.*?\n(.*?)(?=\n\n|$)",
f"{section_name}[:\s](.*?)(?=\n\n|$)",
f"{section_name}:\s*(.*?)(?=\n|$)",
f"{section_name}\s*(.*?)(?=\n|$)"
]
for pattern in patterns:
match = re.search(pattern, text, re.DOTALL | re.IGNORECASE)
if match:
return match.group(1).strip()
return f"No {section_name.lower()} information found"
except Exception as e:
print(f"Error extracting {section_name}: {str(e)}")
return f"Error extracting {section_name.lower()}"
def extract_sources(text):
"""Extract sources from the text"""
try:
sources = []
patterns = [
r"Source:.*?(?:\n|$)",
r"\[.*?\]",
r"\(https?://.*?\)",
r"Reference:.*?(?:\n|$)",
r"Retrieved from:.*?(?:\n|$)"
]
for pattern in patterns:
matches = re.finditer(pattern, text, re.MULTILINE)
sources.extend([match.group().strip() for match in matches])
return sources if sources else ["Sources not explicitly mentioned"]
except Exception as e:
print(f"Error extracting sources: {str(e)}")
return ["Error extracting sources"]
def format_json_output(raw_output):
"""Format CrewOutput into proper JSON structure"""
try:
# Get raw text from output
if hasattr(raw_output, 'raw_output'):
raw_text = str(raw_output.raw_output)
else:
raw_text = str(raw_output)
print("Raw text received:", raw_text[:500]) # Debug print
# Try to find and parse JSON structure
try:
json_pattern = r"\{[\s\S]*\}"
match = re.search(json_pattern, raw_text)
if match:
json_str = match.group()
parsed_json = json.loads(json_str)
print("Successfully parsed JSON:", json_str[:500]) # Debug print
# Ensure all required fields exist
if isinstance(parsed_json, dict):
return {
"exec_summary": {
"summary": parsed_json.get('exec_summary', {}).get('summary',
extract_section(raw_text, "Executive Summary")),
"market_size": parsed_json.get('exec_summary', {}).get('market_size',
extract_section(raw_text, "Market Size")),
"growth_rate": parsed_json.get('exec_summary', {}).get('growth_rate',
extract_section(raw_text, "Growth Rate")),
"key_players": parsed_json.get('exec_summary', {}).get('key_players',
extract_section(raw_text, "Key Players"))
},
"detailed_report": parsed_json.get('detailed_report', raw_text),
"sources": parsed_json.get('sources', extract_sources(raw_text)),
"metrics": parsed_json.get('metrics', {
"market_size_data": [],
"growth_rates": [],
"market_shares": {}
})
}
except json.JSONDecodeError as e:
print(f"JSON parsing error: {str(e)}") # Debug print
# If JSON parsing fails, create structured format from raw text
print("Creating structured format from raw text") # Debug print
return {
"exec_summary": {
"summary": extract_section(raw_text, "Executive Summary"),
"market_size": extract_section(raw_text, "Market Size"),
"growth_rate": extract_section(raw_text, "Growth Rate"),
"key_players": extract_section(raw_text, "Key Players")
},
"detailed_report": raw_text,
"sources": extract_sources(raw_text),
"metrics": {
"market_size_data": [],
"growth_rates": [],
"market_shares": {}
}
}
except Exception as e:
print(f"Error in format_json_output: {str(e)}") # Debug print
# Return a safe default structure
return {
"exec_summary": {
"summary": "Error processing report",
"market_size": "Data not available",
"growth_rate": "Data not available",
"key_players": "Data not available"
},
"detailed_report": raw_text if 'raw_text' in locals() else str(raw_output),
"sources": [],
"metrics": {
"market_size_data": [],
"growth_rates": [],
"market_shares": {}
}
} |