DB Chat for pro users only
Browse files- python_code_executor_service.py +110 -56
- requirements.txt +0 -1
python_code_executor_service.py
CHANGED
|
@@ -16,30 +16,33 @@ import seaborn as sns
|
|
| 16 |
import scipy.stats as stats
|
| 17 |
from pydantic import BaseModel
|
| 18 |
from tabulate import tabulate
|
| 19 |
-
from PIL import Image # New import
|
| 20 |
|
| 21 |
from supabase_service import upload_file_to_supabase
|
| 22 |
|
| 23 |
-
# Load environment variables
|
| 24 |
load_dotenv()
|
| 25 |
|
| 26 |
-
|
| 27 |
class CodeResponse(BaseModel):
|
|
|
|
| 28 |
language: str = "python"
|
| 29 |
code: str
|
| 30 |
|
| 31 |
|
| 32 |
class ChartSpecification(BaseModel):
|
|
|
|
| 33 |
image_description: str
|
| 34 |
code: Optional[str] = None
|
|
|
|
| 35 |
|
| 36 |
|
| 37 |
class AnalysisOperation(BaseModel):
|
|
|
|
| 38 |
code: CodeResponse
|
| 39 |
result_var: str
|
| 40 |
|
| 41 |
|
| 42 |
class CsvChatResult(BaseModel):
|
|
|
|
| 43 |
response_type: Literal["casual", "data_analysis", "visualization", "mixed"]
|
| 44 |
casual_response: str
|
| 45 |
analysis_operations: List[AnalysisOperation]
|
|
@@ -47,149 +50,200 @@ class CsvChatResult(BaseModel):
|
|
| 47 |
|
| 48 |
|
| 49 |
class PythonExecutor:
|
|
|
|
|
|
|
| 50 |
def __init__(self, df: pd.DataFrame, charts_folder: str = "generated_charts"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
self.df = df
|
| 52 |
self.charts_folder = Path(charts_folder)
|
| 53 |
self.charts_folder.mkdir(exist_ok=True)
|
| 54 |
self.exec_locals = {}
|
| 55 |
-
|
| 56 |
def execute_code(self, code: str) -> Dict[str, Any]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
output = ""
|
| 58 |
error = None
|
| 59 |
plots = []
|
| 60 |
-
|
|
|
|
| 61 |
stdout = io.StringIO()
|
|
|
|
|
|
|
| 62 |
original_show = plt.show
|
| 63 |
-
|
| 64 |
def custom_show():
|
|
|
|
| 65 |
for i, fig in enumerate(plt.get_fignums()):
|
| 66 |
figure = plt.figure(fig)
|
|
|
|
| 67 |
buf = io.BytesIO()
|
| 68 |
figure.savefig(buf, format='png', bbox_inches='tight')
|
| 69 |
buf.seek(0)
|
| 70 |
plots.append(buf.read())
|
| 71 |
plt.close('all')
|
| 72 |
-
|
| 73 |
try:
|
|
|
|
| 74 |
exec_globals = {
|
|
|
|
| 75 |
'pd': pd,
|
| 76 |
'np': np,
|
| 77 |
'df': self.df,
|
|
|
|
|
|
|
| 78 |
'plt': plt,
|
| 79 |
'sns': sns,
|
| 80 |
'tabulate': tabulate,
|
|
|
|
|
|
|
| 81 |
'stats': stats,
|
|
|
|
|
|
|
| 82 |
'datetime': datetime,
|
| 83 |
'timedelta': timedelta,
|
| 84 |
'time': time,
|
|
|
|
|
|
|
| 85 |
'json': json,
|
| 86 |
'__builtins__': __builtins__,
|
| 87 |
}
|
| 88 |
-
|
|
|
|
| 89 |
plt.show = custom_show
|
| 90 |
-
|
|
|
|
| 91 |
with contextlib.redirect_stdout(stdout):
|
| 92 |
exec(code, exec_globals, self.exec_locals)
|
| 93 |
-
|
| 94 |
output = stdout.getvalue()
|
| 95 |
-
|
| 96 |
except Exception as e:
|
| 97 |
error = {
|
| 98 |
"message": str(e),
|
| 99 |
"traceback": traceback.format_exc()
|
| 100 |
}
|
| 101 |
finally:
|
|
|
|
| 102 |
plt.show = original_show
|
| 103 |
-
|
| 104 |
return {
|
| 105 |
'output': output,
|
| 106 |
'error': error,
|
| 107 |
'plots': plots,
|
| 108 |
'locals': self.exec_locals
|
| 109 |
}
|
| 110 |
-
|
| 111 |
-
def is_blank_image(self, image_bytes: bytes) -> bool:
|
| 112 |
-
"""Check if the image is blank (all pixels same intensity)"""
|
| 113 |
-
try:
|
| 114 |
-
img = Image.open(io.BytesIO(image_bytes)).convert("L") # grayscale
|
| 115 |
-
extrema = img.getextrema()
|
| 116 |
-
return extrema[0] == extrema[1] # all pixels same
|
| 117 |
-
except Exception:
|
| 118 |
-
return False
|
| 119 |
-
|
| 120 |
async def save_plot_to_supabase(self, plot_data: bytes, description: str, chat_id: str) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
filename = f"chart_{uuid.uuid4().hex}.png"
|
| 122 |
filepath = self.charts_folder / filename
|
| 123 |
-
|
|
|
|
| 124 |
with open(filepath, 'wb') as f:
|
| 125 |
f.write(plot_data)
|
| 126 |
-
|
| 127 |
try:
|
|
|
|
| 128 |
public_url = await upload_file_to_supabase(
|
| 129 |
file_path=str(filepath),
|
| 130 |
file_name=filename,
|
| 131 |
chat_id=chat_id
|
| 132 |
)
|
|
|
|
|
|
|
| 133 |
os.remove(filepath)
|
|
|
|
| 134 |
return public_url
|
| 135 |
except Exception as e:
|
|
|
|
| 136 |
if os.path.exists(filepath):
|
| 137 |
os.remove(filepath)
|
| 138 |
raise Exception(f"Failed to upload plot to Supabase: {e}")
|
| 139 |
-
|
| 140 |
def _format_result(self, result: Any) -> str:
|
|
|
|
| 141 |
if isinstance(result, (pd.DataFrame, pd.Series)):
|
| 142 |
-
|
| 143 |
-
return json.dumps(json.loads(json_str), indent=2)
|
| 144 |
elif isinstance(result, (dict, list)):
|
| 145 |
return json.dumps(result, indent=2)
|
| 146 |
return str(result)
|
| 147 |
-
|
| 148 |
async def process_response(self, response: CsvChatResult, chat_id: str) -> str:
|
|
|
|
| 149 |
output_parts = [response.casual_response]
|
| 150 |
-
|
|
|
|
| 151 |
for operation in response.analysis_operations:
|
| 152 |
execution_result = self.execute_code(operation.code.code)
|
| 153 |
result = self.exec_locals.get(operation.result_var)
|
| 154 |
-
|
| 155 |
if execution_result['error']:
|
| 156 |
-
output_parts.append(f"\
|
| 157 |
output_parts.append("```python\n" + execution_result['error']['message'] + "\n```")
|
| 158 |
elif result is not None:
|
| 159 |
if result is None or (hasattr(result, '__len__') and len(result) == 0):
|
| 160 |
-
output_parts.append(f"\
|
| 161 |
else:
|
| 162 |
-
output_parts.append(f"\
|
| 163 |
output_parts.append("```python\n" + self._format_result(result) + "\n```")
|
| 164 |
else:
|
| 165 |
-
|
| 166 |
-
if
|
| 167 |
-
output_parts.append(
|
| 168 |
-
|
| 169 |
-
|
|
|
|
|
|
|
| 170 |
if response.charts:
|
| 171 |
-
output_parts.append("\
|
| 172 |
for chart in response.charts:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
if chart.code:
|
| 174 |
chart_result = self.execute_code(chart.code)
|
| 175 |
if chart_result['plots']:
|
| 176 |
for plot_data in chart_result['plots']:
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
output_parts.append(f"\n⚠️ Values are missing - Error uploading chart: {str(e)}")
|
| 188 |
-
else:
|
| 189 |
-
output_parts.append(f"\n⚠️ Skipped blank chart: '{chart.image_description}'")
|
| 190 |
elif chart_result['error']:
|
| 191 |
-
output_parts.append("```python\n" + f"
|
| 192 |
else:
|
| 193 |
-
output_parts.append(f"\
|
| 194 |
-
|
| 195 |
-
return "\n".join(output_parts)
|
|
|
|
| 16 |
import scipy.stats as stats
|
| 17 |
from pydantic import BaseModel
|
| 18 |
from tabulate import tabulate
|
|
|
|
| 19 |
|
| 20 |
from supabase_service import upload_file_to_supabase
|
| 21 |
|
| 22 |
+
# Load environment variables from .env file
|
| 23 |
load_dotenv()
|
| 24 |
|
|
|
|
| 25 |
class CodeResponse(BaseModel):
|
| 26 |
+
"""Container for code-related responses"""
|
| 27 |
language: str = "python"
|
| 28 |
code: str
|
| 29 |
|
| 30 |
|
| 31 |
class ChartSpecification(BaseModel):
|
| 32 |
+
"""Details about requested charts"""
|
| 33 |
image_description: str
|
| 34 |
code: Optional[str] = None
|
| 35 |
+
media_pointer: Optional[str] = None # Added media pointer field
|
| 36 |
|
| 37 |
|
| 38 |
class AnalysisOperation(BaseModel):
|
| 39 |
+
"""Container for a single analysis operation with its code and result"""
|
| 40 |
code: CodeResponse
|
| 41 |
result_var: str
|
| 42 |
|
| 43 |
|
| 44 |
class CsvChatResult(BaseModel):
|
| 45 |
+
"""Structured response for CSV-related AI interactions"""
|
| 46 |
response_type: Literal["casual", "data_analysis", "visualization", "mixed"]
|
| 47 |
casual_response: str
|
| 48 |
analysis_operations: List[AnalysisOperation]
|
|
|
|
| 50 |
|
| 51 |
|
| 52 |
class PythonExecutor:
|
| 53 |
+
"""Handles execution of Python code with comprehensive data analysis libraries"""
|
| 54 |
+
|
| 55 |
def __init__(self, df: pd.DataFrame, charts_folder: str = "generated_charts"):
|
| 56 |
+
"""
|
| 57 |
+
Initialize the PythonExecutor with a DataFrame
|
| 58 |
+
|
| 59 |
+
Args:
|
| 60 |
+
df (pd.DataFrame): The DataFrame to operate on
|
| 61 |
+
charts_folder (str): Folder to save charts in
|
| 62 |
+
"""
|
| 63 |
self.df = df
|
| 64 |
self.charts_folder = Path(charts_folder)
|
| 65 |
self.charts_folder.mkdir(exist_ok=True)
|
| 66 |
self.exec_locals = {}
|
| 67 |
+
|
| 68 |
def execute_code(self, code: str) -> Dict[str, Any]:
|
| 69 |
+
"""
|
| 70 |
+
Execute Python code with full data analysis context and return results
|
| 71 |
+
|
| 72 |
+
Args:
|
| 73 |
+
code (str): Python code to execute
|
| 74 |
+
|
| 75 |
+
Returns:
|
| 76 |
+
dict: Dictionary containing execution results and any generated plots
|
| 77 |
+
"""
|
| 78 |
output = ""
|
| 79 |
error = None
|
| 80 |
plots = []
|
| 81 |
+
|
| 82 |
+
# Capture stdout
|
| 83 |
stdout = io.StringIO()
|
| 84 |
+
|
| 85 |
+
# Monkey patch plt.show() to save figures
|
| 86 |
original_show = plt.show
|
| 87 |
+
|
| 88 |
def custom_show():
|
| 89 |
+
"""Custom show function that saves plots instead of displaying them"""
|
| 90 |
for i, fig in enumerate(plt.get_fignums()):
|
| 91 |
figure = plt.figure(fig)
|
| 92 |
+
# Save plot to bytes buffer
|
| 93 |
buf = io.BytesIO()
|
| 94 |
figure.savefig(buf, format='png', bbox_inches='tight')
|
| 95 |
buf.seek(0)
|
| 96 |
plots.append(buf.read())
|
| 97 |
plt.close('all')
|
| 98 |
+
|
| 99 |
try:
|
| 100 |
+
# Create comprehensive execution context with data analysis libraries
|
| 101 |
exec_globals = {
|
| 102 |
+
# Core data analysis
|
| 103 |
'pd': pd,
|
| 104 |
'np': np,
|
| 105 |
'df': self.df,
|
| 106 |
+
|
| 107 |
+
# Visualization
|
| 108 |
'plt': plt,
|
| 109 |
'sns': sns,
|
| 110 |
'tabulate': tabulate,
|
| 111 |
+
|
| 112 |
+
# Statistics
|
| 113 |
'stats': stats,
|
| 114 |
+
|
| 115 |
+
# Date/time
|
| 116 |
'datetime': datetime,
|
| 117 |
'timedelta': timedelta,
|
| 118 |
'time': time,
|
| 119 |
+
|
| 120 |
+
# Utilities
|
| 121 |
'json': json,
|
| 122 |
'__builtins__': __builtins__,
|
| 123 |
}
|
| 124 |
+
|
| 125 |
+
# Replace plt.show with custom implementation
|
| 126 |
plt.show = custom_show
|
| 127 |
+
|
| 128 |
+
# Execute code and capture output
|
| 129 |
with contextlib.redirect_stdout(stdout):
|
| 130 |
exec(code, exec_globals, self.exec_locals)
|
| 131 |
+
|
| 132 |
output = stdout.getvalue()
|
| 133 |
+
|
| 134 |
except Exception as e:
|
| 135 |
error = {
|
| 136 |
"message": str(e),
|
| 137 |
"traceback": traceback.format_exc()
|
| 138 |
}
|
| 139 |
finally:
|
| 140 |
+
# Restore original plt.show
|
| 141 |
plt.show = original_show
|
| 142 |
+
|
| 143 |
return {
|
| 144 |
'output': output,
|
| 145 |
'error': error,
|
| 146 |
'plots': plots,
|
| 147 |
'locals': self.exec_locals
|
| 148 |
}
|
| 149 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
async def save_plot_to_supabase(self, plot_data: bytes, description: str, chat_id: str) -> str:
|
| 151 |
+
"""
|
| 152 |
+
Save plot to Supabase storage and return the public URL
|
| 153 |
+
|
| 154 |
+
Args:
|
| 155 |
+
plot_data (bytes): Image data in bytes
|
| 156 |
+
description (str): Description of the plot
|
| 157 |
+
chat_id (str): ID of the chat session
|
| 158 |
+
|
| 159 |
+
Returns:
|
| 160 |
+
str: Public URL of the uploaded chart
|
| 161 |
+
"""
|
| 162 |
+
# Generate unique filename
|
| 163 |
filename = f"chart_{uuid.uuid4().hex}.png"
|
| 164 |
filepath = self.charts_folder / filename
|
| 165 |
+
|
| 166 |
+
# Save the plot locally first
|
| 167 |
with open(filepath, 'wb') as f:
|
| 168 |
f.write(plot_data)
|
| 169 |
+
|
| 170 |
try:
|
| 171 |
+
# Upload to Supabase
|
| 172 |
public_url = await upload_file_to_supabase(
|
| 173 |
file_path=str(filepath),
|
| 174 |
file_name=filename,
|
| 175 |
chat_id=chat_id
|
| 176 |
)
|
| 177 |
+
|
| 178 |
+
# Remove the local file after upload
|
| 179 |
os.remove(filepath)
|
| 180 |
+
|
| 181 |
return public_url
|
| 182 |
except Exception as e:
|
| 183 |
+
# Clean up local file if upload fails
|
| 184 |
if os.path.exists(filepath):
|
| 185 |
os.remove(filepath)
|
| 186 |
raise Exception(f"Failed to upload plot to Supabase: {e}")
|
| 187 |
+
|
| 188 |
def _format_result(self, result: Any) -> str:
|
| 189 |
+
"""Format the result for display"""
|
| 190 |
if isinstance(result, (pd.DataFrame, pd.Series)):
|
| 191 |
+
return result.to_string()
|
|
|
|
| 192 |
elif isinstance(result, (dict, list)):
|
| 193 |
return json.dumps(result, indent=2)
|
| 194 |
return str(result)
|
| 195 |
+
|
| 196 |
async def process_response(self, response: CsvChatResult, chat_id: str) -> str:
|
| 197 |
+
"""Process the response with proper variable handling"""
|
| 198 |
output_parts = [response.casual_response]
|
| 199 |
+
|
| 200 |
+
# Process analysis operations first
|
| 201 |
for operation in response.analysis_operations:
|
| 202 |
execution_result = self.execute_code(operation.code.code)
|
| 203 |
result = self.exec_locals.get(operation.result_var)
|
| 204 |
+
|
| 205 |
if execution_result['error']:
|
| 206 |
+
output_parts.append(f"\nError in operation '{operation.result_var}':")
|
| 207 |
output_parts.append("```python\n" + execution_result['error']['message'] + "\n```")
|
| 208 |
elif result is not None:
|
| 209 |
if result is None or (hasattr(result, '__len__') and len(result) == 0):
|
| 210 |
+
output_parts.append(f"\nNo data - Operation '{operation.result_var}' returned empty results")
|
| 211 |
else:
|
| 212 |
+
output_parts.append(f"\nResult for '{operation.result_var}':")
|
| 213 |
output_parts.append("```python\n" + self._format_result(result) + "\n```")
|
| 214 |
else:
|
| 215 |
+
output_parts.append(f"\nOperation '{operation.result_var}' output:")
|
| 216 |
+
if execution_result['output'].strip():
|
| 217 |
+
output_parts.append("```\n" + execution_result['output'].strip() + "\n```")
|
| 218 |
+
else:
|
| 219 |
+
output_parts.append("No output - Operation didn't produce any results")
|
| 220 |
+
|
| 221 |
+
# Process charts after all operations
|
| 222 |
if response.charts:
|
| 223 |
+
output_parts.append("\nVisualizations:")
|
| 224 |
for chart in response.charts:
|
| 225 |
+
# Skip processing if media pointer indicates blank chart
|
| 226 |
+
if chart.media_pointer and "blank" in chart.media_pointer.lower():
|
| 227 |
+
output_parts.append(f"\nSkipped blank chart: {chart.image_description}")
|
| 228 |
+
continue
|
| 229 |
+
|
| 230 |
if chart.code:
|
| 231 |
chart_result = self.execute_code(chart.code)
|
| 232 |
if chart_result['plots']:
|
| 233 |
for plot_data in chart_result['plots']:
|
| 234 |
+
try:
|
| 235 |
+
public_url = await self.save_plot_to_supabase(
|
| 236 |
+
plot_data=plot_data,
|
| 237 |
+
description=chart.image_description,
|
| 238 |
+
chat_id=chat_id
|
| 239 |
+
)
|
| 240 |
+
output_parts.append(f"\n{chart.image_description}")
|
| 241 |
+
output_parts.append(f"")
|
| 242 |
+
except Exception as e:
|
| 243 |
+
output_parts.append(f"\nChart error - Couldn't upload: {str(e)}")
|
|
|
|
|
|
|
|
|
|
| 244 |
elif chart_result['error']:
|
| 245 |
+
output_parts.append("```python\n" + f"Chart error: {chart_result['error']['message']}" + "\n```")
|
| 246 |
else:
|
| 247 |
+
output_parts.append(f"\nNo chart - '{chart.image_description}' couldn't be generated")
|
| 248 |
+
|
| 249 |
+
return "\n".join(output_parts)
|
requirements.txt
CHANGED
|
@@ -16,4 +16,3 @@ langchain-google-genai==2.0.7
|
|
| 16 |
supabase==2.13.0
|
| 17 |
langchain_openai==0.3.11
|
| 18 |
tabulate==0.9.0
|
| 19 |
-
pillow==11.2.1
|
|
|
|
| 16 |
supabase==2.13.0
|
| 17 |
langchain_openai==0.3.11
|
| 18 |
tabulate==0.9.0
|
|
|