cryogenic22 commited on
Commit
e5de79f
·
verified ·
1 Parent(s): f8f58a8

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +81 -37
utils.py CHANGED
@@ -14,49 +14,92 @@ def update_progress(container, percentage, message=""):
14
 
15
  def extract_section(text, section_name):
16
  """Extract a section from the text"""
17
- pattern = f"{section_name}.*?\n(.*?)(?=\n\n|$)"
18
- match = re.search(pattern, text, re.DOTALL | re.IGNORECASE)
19
- if match:
20
- return match.group(1).strip()
21
-
22
- pattern2 = f"{section_name}[:\s](.*?)(?=\n\n|$)"
23
- match = re.search(pattern2, text, re.DOTALL | re.IGNORECASE)
24
- return match.group(1).strip() if match else "Information not found"
 
 
 
 
 
 
 
 
 
 
25
 
26
  def extract_sources(text):
27
  """Extract sources from the text"""
28
- sources = []
29
- patterns = [
30
- r"Source:.*?(?:\n|$)",
31
- r"\[.*?\]",
32
- r"\(https?://.*?\)",
33
- r"Reference:.*?(?:\n|$)"
34
- ]
35
-
36
- for pattern in patterns:
37
- matches = re.finditer(pattern, text, re.MULTILINE)
38
- sources.extend([match.group().strip() for match in matches])
39
-
40
- return sources if sources else ["Sources not explicitly mentioned"]
 
 
 
 
 
41
 
42
  def format_json_output(raw_output):
43
  """Format CrewOutput into proper JSON structure"""
44
  try:
 
45
  if hasattr(raw_output, 'raw_output'):
46
  raw_text = str(raw_output.raw_output)
47
  else:
48
  raw_text = str(raw_output)
49
-
50
- # Try to find JSON structure
51
- json_pattern = r"\{[\s\S]*\}"
52
- match = re.search(json_pattern, raw_text)
53
- if match:
54
- try:
55
- return json.loads(match.group())
56
- except:
57
- pass
58
-
59
- # Create structured format
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  return {
61
  "exec_summary": {
62
  "summary": extract_section(raw_text, "Executive Summary"),
@@ -73,13 +116,14 @@ def format_json_output(raw_output):
73
  }
74
  }
75
  except Exception as e:
76
- st.error(f"Error formatting output: {str(e)}")
 
77
  return {
78
  "exec_summary": {
79
- "summary": "Error formatting report",
80
- "market_size": "N/A",
81
- "growth_rate": "N/A",
82
- "key_players": "N/A"
83
  },
84
  "detailed_report": raw_text if 'raw_text' in locals() else str(raw_output),
85
  "sources": [],
 
14
 
15
  def extract_section(text, section_name):
16
  """Extract a section from the text"""
17
+ try:
18
+ # Try multiple patterns to find the section
19
+ patterns = [
20
+ f"{section_name}.*?\n(.*?)(?=\n\n|$)",
21
+ f"{section_name}[:\s](.*?)(?=\n\n|$)",
22
+ f"{section_name}:\s*(.*?)(?=\n|$)",
23
+ f"{section_name}\s*(.*?)(?=\n|$)"
24
+ ]
25
+
26
+ for pattern in patterns:
27
+ match = re.search(pattern, text, re.DOTALL | re.IGNORECASE)
28
+ if match:
29
+ return match.group(1).strip()
30
+
31
+ return f"No {section_name.lower()} information found"
32
+ except Exception as e:
33
+ print(f"Error extracting {section_name}: {str(e)}")
34
+ return f"Error extracting {section_name.lower()}"
35
 
36
  def extract_sources(text):
37
  """Extract sources from the text"""
38
+ try:
39
+ sources = []
40
+ patterns = [
41
+ r"Source:.*?(?:\n|$)",
42
+ r"\[.*?\]",
43
+ r"\(https?://.*?\)",
44
+ r"Reference:.*?(?:\n|$)",
45
+ r"Retrieved from:.*?(?:\n|$)"
46
+ ]
47
+
48
+ for pattern in patterns:
49
+ matches = re.finditer(pattern, text, re.MULTILINE)
50
+ sources.extend([match.group().strip() for match in matches])
51
+
52
+ return sources if sources else ["Sources not explicitly mentioned"]
53
+ except Exception as e:
54
+ print(f"Error extracting sources: {str(e)}")
55
+ return ["Error extracting sources"]
56
 
57
  def format_json_output(raw_output):
58
  """Format CrewOutput into proper JSON structure"""
59
  try:
60
+ # Get raw text from output
61
  if hasattr(raw_output, 'raw_output'):
62
  raw_text = str(raw_output.raw_output)
63
  else:
64
  raw_text = str(raw_output)
65
+
66
+ print("Raw text received:", raw_text[:500]) # Debug print
67
+
68
+ # Try to find and parse JSON structure
69
+ try:
70
+ json_pattern = r"\{[\s\S]*\}"
71
+ match = re.search(json_pattern, raw_text)
72
+ if match:
73
+ json_str = match.group()
74
+ parsed_json = json.loads(json_str)
75
+ print("Successfully parsed JSON:", json_str[:500]) # Debug print
76
+
77
+ # Ensure all required fields exist
78
+ if isinstance(parsed_json, dict):
79
+ return {
80
+ "exec_summary": {
81
+ "summary": parsed_json.get('exec_summary', {}).get('summary',
82
+ extract_section(raw_text, "Executive Summary")),
83
+ "market_size": parsed_json.get('exec_summary', {}).get('market_size',
84
+ extract_section(raw_text, "Market Size")),
85
+ "growth_rate": parsed_json.get('exec_summary', {}).get('growth_rate',
86
+ extract_section(raw_text, "Growth Rate")),
87
+ "key_players": parsed_json.get('exec_summary', {}).get('key_players',
88
+ extract_section(raw_text, "Key Players"))
89
+ },
90
+ "detailed_report": parsed_json.get('detailed_report', raw_text),
91
+ "sources": parsed_json.get('sources', extract_sources(raw_text)),
92
+ "metrics": parsed_json.get('metrics', {
93
+ "market_size_data": [],
94
+ "growth_rates": [],
95
+ "market_shares": {}
96
+ })
97
+ }
98
+ except json.JSONDecodeError as e:
99
+ print(f"JSON parsing error: {str(e)}") # Debug print
100
+
101
+ # If JSON parsing fails, create structured format from raw text
102
+ print("Creating structured format from raw text") # Debug print
103
  return {
104
  "exec_summary": {
105
  "summary": extract_section(raw_text, "Executive Summary"),
 
116
  }
117
  }
118
  except Exception as e:
119
+ print(f"Error in format_json_output: {str(e)}") # Debug print
120
+ # Return a safe default structure
121
  return {
122
  "exec_summary": {
123
+ "summary": "Error processing report",
124
+ "market_size": "Data not available",
125
+ "growth_rate": "Data not available",
126
+ "key_players": "Data not available"
127
  },
128
  "detailed_report": raw_text if 'raw_text' in locals() else str(raw_output),
129
  "sources": [],