Lavlu118557 commited on
Commit
cb42bfd
Β·
verified Β·
1 Parent(s): 5da618c

Create utils.py

Browse files
Files changed (1) hide show
  1. utils.py +183 -0
utils.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import yaml
2
+ import json
3
+ import markdown
4
+ from fuzzywuzzy import fuzz, process
5
+ from typing import Dict, List, Any
6
+ import logging
7
+
8
+ def parse_yaml(yaml_text: str) -> Dict[str, Any]:
9
+ """Parse YAML text and return dictionary"""
10
+ try:
11
+ return yaml.safe_load(yaml_text)
12
+ except yaml.YAMLError as e:
13
+ logging.error(f"YAML parsing error: {str(e)}")
14
+ raise e
15
+
16
+ def fuzzy_search(query: str, data: Dict[str, Any], threshold: int = 60) -> List[Dict[str, Any]]:
17
+ """Perform fuzzy search on dictionary data"""
18
+ matches = []
19
+
20
+ if not isinstance(data, dict):
21
+ return matches
22
+
23
+ for key, value in data.items():
24
+ if isinstance(value, (str, int, float)):
25
+ value_str = str(value)
26
+
27
+ # Check fuzzy match for key
28
+ key_score = fuzz.partial_ratio(query.lower(), key.lower())
29
+ if key_score >= threshold:
30
+ matches.append({
31
+ 'type': 'key',
32
+ 'field': key,
33
+ 'value': value_str,
34
+ 'score': key_score
35
+ })
36
+
37
+ # Check fuzzy match for value
38
+ value_score = fuzz.partial_ratio(query.lower(), value_str.lower())
39
+ if value_score >= threshold:
40
+ matches.append({
41
+ 'type': 'value',
42
+ 'field': key,
43
+ 'value': value_str,
44
+ 'score': value_score
45
+ })
46
+
47
+ # Sort by score descending
48
+ matches.sort(key=lambda x: x['score'], reverse=True)
49
+ return matches
50
+
51
+ def render_markdown(text: str) -> str:
52
+ """Render markdown text to HTML with emoji support"""
53
+ try:
54
+ md = markdown.Markdown(extensions=['extra', 'codehilite'])
55
+ html = md.convert(text)
56
+
57
+ # Basic emoji support - convert common emoji codes
58
+ emoji_map = {
59
+ ':smile:': '😊',
60
+ ':heart:': '❀️',
61
+ ':thumbsup:': 'πŸ‘',
62
+ ':thumbsdown:': 'πŸ‘Ž',
63
+ ':fire:': 'πŸ”₯',
64
+ ':rocket:': 'πŸš€',
65
+ ':star:': '⭐',
66
+ ':check:': 'βœ…',
67
+ ':x:': '❌',
68
+ ':warning:': '⚠️',
69
+ ':info:': 'ℹ️',
70
+ ':bulb:': 'πŸ’‘',
71
+ ':tada:': 'πŸŽ‰'
72
+ }
73
+
74
+ for code, emoji in emoji_map.items():
75
+ html = html.replace(code, emoji)
76
+
77
+ return html
78
+ except Exception as e:
79
+ logging.error(f"Markdown rendering error: {str(e)}")
80
+ return text
81
+
82
+ def create_dynamic_table(table_name: str, schema: Dict[str, Any]) -> bool:
83
+ """Create a dynamic table based on schema (for future implementation)"""
84
+ # This function can be expanded to create actual database tables
85
+ # For now, we use the generic DataRecord model with JSON storage
86
+ try:
87
+ logging.info(f"Creating dynamic table: {table_name} with schema: {schema}")
88
+ return True
89
+ except Exception as e:
90
+ logging.error(f"Error creating dynamic table: {str(e)}")
91
+ return False
92
+
93
+ def validate_schema(schema: Dict[str, Any]) -> bool:
94
+ """Validate table schema format"""
95
+ if not isinstance(schema, dict):
96
+ return False
97
+
98
+ if 'fields' not in schema:
99
+ return False
100
+
101
+ if not isinstance(schema['fields'], list):
102
+ return False
103
+
104
+ for field in schema['fields']:
105
+ if not isinstance(field, dict):
106
+ return False
107
+ if 'name' not in field or 'type' not in field:
108
+ return False
109
+
110
+ return True
111
+
112
+ def process_pipeline_data(pipeline_config: Dict[str, Any], source_data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
113
+ """Process data through a pipeline configuration"""
114
+ processed_data = source_data.copy()
115
+
116
+ try:
117
+ # Apply transformations based on pipeline config
118
+ transformations = pipeline_config.get('transformations', [])
119
+
120
+ for transformation in transformations:
121
+ transform_type = transformation.get('type')
122
+
123
+ if transform_type == 'filter':
124
+ condition = transformation.get('condition')
125
+ processed_data = [item for item in processed_data if eval_condition(item, condition)]
126
+
127
+ elif transform_type == 'map':
128
+ mapping = transformation.get('mapping')
129
+ for item in processed_data:
130
+ apply_mapping(item, mapping)
131
+
132
+ elif transform_type == 'sort':
133
+ field = transformation.get('field')
134
+ reverse = transformation.get('reverse', False)
135
+ processed_data.sort(key=lambda x: x.get(field, ''), reverse=reverse)
136
+
137
+ return processed_data
138
+
139
+ except Exception as e:
140
+ logging.error(f"Pipeline processing error: {str(e)}")
141
+ return source_data
142
+
143
+ def eval_condition(data: Dict[str, Any], condition: Dict[str, Any]) -> bool:
144
+ """Evaluate a condition against data"""
145
+ try:
146
+ field = condition.get('field')
147
+ operator = condition.get('operator')
148
+ value = condition.get('value')
149
+
150
+ if not field or not operator:
151
+ return True
152
+
153
+ data_value = data.get(field)
154
+
155
+ if operator == 'equals':
156
+ return data_value == value
157
+ elif operator == 'contains':
158
+ if data_value is None or value is None:
159
+ return False
160
+ return str(value).lower() in str(data_value).lower()
161
+ elif operator == 'gt':
162
+ try:
163
+ return float(data_value or 0) > float(value or 0)
164
+ except (ValueError, TypeError):
165
+ return False
166
+ elif operator == 'lt':
167
+ try:
168
+ return float(data_value or 0) < float(value or 0)
169
+ except (ValueError, TypeError):
170
+ return False
171
+
172
+ return True
173
+ except Exception:
174
+ return True
175
+
176
+ def apply_mapping(data: Dict[str, Any], mapping: Dict[str, str]) -> None:
177
+ """Apply field mapping to data"""
178
+ try:
179
+ for old_field, new_field in mapping.items():
180
+ if old_field in data:
181
+ data[new_field] = data.pop(old_field)
182
+ except Exception as e:
183
+ logging.error(f"Mapping error: {str(e)}")