cryogenic22 commited on
Commit
2b474dd
·
verified ·
1 Parent(s): 848d03e

Create utils.py

Browse files
Files changed (1) hide show
  1. utils.py +82 -123
utils.py CHANGED
@@ -1,132 +1,91 @@
1
  # utils.py
 
2
  import json
3
- from datetime import datetime
4
  import streamlit as st
5
- import plotly.graph_objects as go
6
- import plotly.express as px
7
- import re
8
- import pdfkit
9
- from jinja2 import Template
10
-
11
- class ReportManager:
12
- def __init__(self):
13
- if 'reports' not in st.session_state:
14
- st.session_state.reports = {}
15
-
16
- def save_report(self, topic: str, report_data: dict):
17
- """Save a new report with timestamp"""
18
- timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
19
- report_id = f"{topic}_{timestamp}"
20
-
21
- st.session_state.reports[report_id] = {
22
- 'topic': topic,
23
- 'timestamp': timestamp,
24
- 'exec_summary': report_data.get('exec_summary', ''),
25
- 'detailed_report': report_data.get('detailed_report', ''),
26
- 'sources': report_data.get('sources', []),
27
- 'charts': report_data.get('charts', [])
28
- }
29
- return report_id
30
-
31
- def get_report(self, report_id: str):
32
- """Retrieve a specific report"""
33
- return st.session_state.reports.get(report_id)
34
 
35
- def get_all_reports(self):
36
- """Get all saved reports"""
37
- return st.session_state.reports
 
 
 
 
 
38
 
39
- def generate_visualizations(self, report_text: str):
40
- """Generate relevant visualizations based on report content"""
41
- charts = []
42
-
43
- # Extract numerical data using regex patterns
44
- market_size_pattern = r"\$\s*(\d+(?:\.\d+)?)\s*(?:billion|million|trillion)"
45
- growth_rate_pattern = r"(\d+(?:\.\d+)?)\s*%"
46
-
47
- # Market size data
48
- market_sizes = re.findall(market_size_pattern, report_text)
49
- if market_sizes:
50
- fig = go.Figure(data=[
51
- go.Bar(
52
- x=['Current Market Size', 'Projected Size'],
53
- y=[float(market_sizes[0]), float(market_sizes[-1])],
54
- text=[f'${x}B' for x in market_sizes],
55
- textposition='auto',
56
- )
57
- ])
58
- fig.update_layout(title='Market Size Projection')
59
- charts.append(fig)
60
 
61
- # Growth rate data
62
- growth_rates = re.findall(growth_rate_pattern, report_text)
63
- if growth_rates:
64
- fig = go.Figure(data=[
65
- go.Line(
66
- x=list(range(len(growth_rates))),
67
- y=[float(x) for x in growth_rates],
68
- mode='lines+markers'
69
- )
70
- ])
71
- fig.update_layout(title='Growth Rate Trends')
72
- charts.append(fig)
 
 
 
73
 
74
- return charts
 
 
 
 
 
 
75
 
76
- def generate_pdf(self, report_id: str):
77
- """Generate PDF version of the report"""
78
- report = self.get_report(report_id)
79
- if not report:
80
- return None
 
 
 
81
 
82
- # HTML template for PDF
83
- template_str = """
84
- <!DOCTYPE html>
85
- <html>
86
- <head>
87
- <style>
88
- body { font-family: Arial, sans-serif; }
89
- .header { text-align: center; margin-bottom: 30px; }
90
- .section { margin: 20px 0; }
91
- .charts { text-align: center; }
92
- </style>
93
- </head>
94
- <body>
95
- <div class="header">
96
- <h1>{{ report.topic }} - Market Research Report</h1>
97
- <p>Generated on: {{ report.timestamp }}</p>
98
- </div>
99
-
100
- <div class="section">
101
- <h2>Executive Summary</h2>
102
- {{ report.exec_summary }}
103
- </div>
104
-
105
- <div class="section">
106
- <h2>Detailed Analysis</h2>
107
- {{ report.detailed_report }}
108
- </div>
109
-
110
- <div class="section">
111
- <h2>Sources</h2>
112
- <ul>
113
- {% for source in report.sources %}
114
- <li>{{ source }}</li>
115
- {% endfor %}
116
- </ul>
117
- </div>
118
- </body>
119
- </html>
120
- """
121
-
122
- # Render template
123
- template = Template(template_str)
124
- html = template.render(report=report)
125
-
126
- # Convert to PDF
127
- try:
128
- pdf = pdfkit.from_string(html, False)
129
- return pdf
130
- except Exception as e:
131
- st.error(f"Error generating PDF: {str(e)}")
132
- return None
 
1
  # utils.py
2
+ import re
3
  import json
 
4
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
+ def update_progress(container, percentage, message=""):
7
+ """Update the progress bar"""
8
+ progress_html = f"""
9
+ <div class="progress-container">
10
+ <div class="progress-bar" style="width: {percentage}%">{message}</div>
11
+ </div>
12
+ """
13
+ container.markdown(progress_html, unsafe_allow_html=True)
14
 
15
+ def extract_section(text, section_name):
16
+ """Extract a section from the text"""
17
+ pattern = f"{section_name}.*?\n(.*?)(?=\n\n|$)"
18
+ match = re.search(pattern, text, re.DOTALL | re.IGNORECASE)
19
+ if match:
20
+ return match.group(1).strip()
21
+
22
+ pattern2 = f"{section_name}[:\s](.*?)(?=\n\n|$)"
23
+ match = re.search(pattern2, text, re.DOTALL | re.IGNORECASE)
24
+ return match.group(1).strip() if match else "Information not found"
 
 
 
 
 
 
 
 
 
 
 
25
 
26
+ def extract_sources(text):
27
+ """Extract sources from the text"""
28
+ sources = []
29
+ patterns = [
30
+ r"Source:.*?(?:\n|$)",
31
+ r"\[.*?\]",
32
+ r"\(https?://.*?\)",
33
+ r"Reference:.*?(?:\n|$)"
34
+ ]
35
+
36
+ for pattern in patterns:
37
+ matches = re.finditer(pattern, text, re.MULTILINE)
38
+ sources.extend([match.group().strip() for match in matches])
39
+
40
+ return sources if sources else ["Sources not explicitly mentioned"]
41
 
42
+ def format_json_output(raw_output):
43
+ """Format CrewOutput into proper JSON structure"""
44
+ try:
45
+ if hasattr(raw_output, 'raw_output'):
46
+ raw_text = str(raw_output.raw_output)
47
+ else:
48
+ raw_text = str(raw_output)
49
 
50
+ # Try to find JSON structure
51
+ json_pattern = r"\{[\s\S]*\}"
52
+ match = re.search(json_pattern, raw_text)
53
+ if match:
54
+ try:
55
+ return json.loads(match.group())
56
+ except:
57
+ pass
58
 
59
+ # Create structured format
60
+ return {
61
+ "exec_summary": {
62
+ "summary": extract_section(raw_text, "Executive Summary"),
63
+ "market_size": extract_section(raw_text, "Market Size"),
64
+ "growth_rate": extract_section(raw_text, "Growth Rate"),
65
+ "key_players": extract_section(raw_text, "Key Players")
66
+ },
67
+ "detailed_report": raw_text,
68
+ "sources": extract_sources(raw_text),
69
+ "metrics": {
70
+ "market_size_data": [],
71
+ "growth_rates": [],
72
+ "market_shares": {}
73
+ }
74
+ }
75
+ except Exception as e:
76
+ st.error(f"Error formatting output: {str(e)}")
77
+ return {
78
+ "exec_summary": {
79
+ "summary": "Error formatting report",
80
+ "market_size": "N/A",
81
+ "growth_rate": "N/A",
82
+ "key_players": "N/A"
83
+ },
84
+ "detailed_report": raw_text if 'raw_text' in locals() else str(raw_output),
85
+ "sources": [],
86
+ "metrics": {
87
+ "market_size_data": [],
88
+ "growth_rates": [],
89
+ "market_shares": {}
90
+ }
91
+ }