Spaces:
Running
Running
Pulastya B
commited on
Commit
·
7d775b3
1
Parent(s):
2797314
Fix Phase 1 errors: schema extraction and message type handling
Browse filesFIXES:
1. Schema extraction NoneType comparison error
- Added explicit None check before comparing unique_count
- Prevents '<' operator error on None vs int
2. ChatCompletionMessage AttributeError
- Messages list contains mix of dicts and Pydantic objects
- Updated token estimation to handle both types
- Uses isinstance() check and getattr() fallback
Both errors prevented workflow from completing after first tool execution.
- src/orchestrator.py +4 -1
- src/utils/schema_extraction.py +5 -1
src/orchestrator.py
CHANGED
|
@@ -1717,7 +1717,10 @@ You are a DOER. Complete workflows based on user intent."""
|
|
| 1717 |
print(f"✂️ Pruned conversation (keeping last 4 exchanges, ~4K tokens saved)")
|
| 1718 |
|
| 1719 |
# 🔍 Token estimation and warning
|
| 1720 |
-
estimated_tokens = sum(
|
|
|
|
|
|
|
|
|
|
| 1721 |
if estimated_tokens > 8000:
|
| 1722 |
# Emergency pruning - keep only last 2 exchanges
|
| 1723 |
messages = [messages[0], messages[1]] + messages[-4:]
|
|
|
|
| 1717 |
print(f"✂️ Pruned conversation (keeping last 4 exchanges, ~4K tokens saved)")
|
| 1718 |
|
| 1719 |
# 🔍 Token estimation and warning
|
| 1720 |
+
estimated_tokens = sum(
|
| 1721 |
+
len(str(m.get('content', '') if isinstance(m, dict) else getattr(m, 'content', ''))) // 4
|
| 1722 |
+
for m in messages
|
| 1723 |
+
)
|
| 1724 |
if estimated_tokens > 8000:
|
| 1725 |
# Emergency pruning - keep only last 2 exchanges
|
| 1726 |
messages = [messages[0], messages[1]] + messages[-4:]
|
src/utils/schema_extraction.py
CHANGED
|
@@ -74,7 +74,11 @@ def extract_schema_local(file_path: str, sample_rows: int = 5) -> Dict[str, Any]
|
|
| 74 |
]
|
| 75 |
schema_info['categorical_columns'] = [
|
| 76 |
col for col, info in schema_info['columns'].items()
|
| 77 |
-
if info['dtype'] in ['Utf8', 'String'] or (
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
]
|
| 79 |
schema_info['datetime_columns'] = [
|
| 80 |
col for col, info in schema_info['columns'].items()
|
|
|
|
| 74 |
]
|
| 75 |
schema_info['categorical_columns'] = [
|
| 76 |
col for col, info in schema_info['columns'].items()
|
| 77 |
+
if info['dtype'] in ['Utf8', 'String'] or (
|
| 78 |
+
info.get('unique_count') is not None and
|
| 79 |
+
info.get('unique_count') < 50 and
|
| 80 |
+
col not in schema_info['numeric_columns']
|
| 81 |
+
)
|
| 82 |
]
|
| 83 |
schema_info['datetime_columns'] = [
|
| 84 |
col for col, info in schema_info['columns'].items()
|