Update app.py
Browse files
app.py
CHANGED
|
@@ -163,58 +163,58 @@ class EventScraper:
|
|
| 163 |
Only return the json. nothing else. no comments."""
|
| 164 |
return prompt
|
| 165 |
|
| 166 |
-
def parse_llm_response(self, response):
|
| 167 |
-
|
| 168 |
-
try:
|
| 169 |
-
# Clean the response and handle nested lists
|
| 170 |
-
response = response.strip()
|
| 171 |
-
|
| 172 |
-
# Try parsing as JSON, handling potential nested structures
|
| 173 |
-
def flatten_events(data):
|
| 174 |
-
if isinstance(data, list):
|
| 175 |
-
flattened = []
|
| 176 |
-
for item in data:
|
| 177 |
-
if isinstance(item, list):
|
| 178 |
-
flattened.extend(flatten_events(item))
|
| 179 |
-
elif isinstance(item, dict):
|
| 180 |
-
flattened.append(item)
|
| 181 |
-
return flattened
|
| 182 |
-
return []
|
| 183 |
-
|
| 184 |
try:
|
| 185 |
-
#
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 197 |
events = []
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
gr.Warning(f"Parsing error: {str(e)}")
|
| 217 |
-
return []
|
| 218 |
|
| 219 |
def scrape_events(self):
|
| 220 |
"""Main method to scrape events from all URLs"""
|
|
|
|
| 163 |
Only return the json. nothing else. no comments."""
|
| 164 |
return prompt
|
| 165 |
|
| 166 |
+
def parse_llm_response(self, response):
|
| 167 |
+
"""Parse LLM's text response into structured events"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
try:
|
| 169 |
+
# Clean the response and handle nested lists
|
| 170 |
+
response = response.strip()
|
| 171 |
+
|
| 172 |
+
# Try parsing as JSON, handling potential nested structures
|
| 173 |
+
def flatten_events(data):
|
| 174 |
+
if isinstance(data, list):
|
| 175 |
+
flattened = []
|
| 176 |
+
for item in data:
|
| 177 |
+
if isinstance(item, list):
|
| 178 |
+
flattened.extend(flatten_events(item))
|
| 179 |
+
elif isinstance(item, dict):
|
| 180 |
+
flattened.append(item)
|
| 181 |
+
return flattened
|
| 182 |
+
return []
|
| 183 |
+
|
| 184 |
+
try:
|
| 185 |
+
# First, attempt direct JSON parsing
|
| 186 |
+
events = json.loads(response)
|
| 187 |
+
events = flatten_events(events)
|
| 188 |
+
except json.JSONDecodeError:
|
| 189 |
+
# If direct parsing fails, try extracting JSON
|
| 190 |
+
import re
|
| 191 |
+
json_match = re.search(r'\[.*\]', response, re.DOTALL | re.MULTILINE)
|
| 192 |
+
if json_match:
|
| 193 |
+
try:
|
| 194 |
+
events = json.loads(json_match.group(0))
|
| 195 |
+
events = flatten_events(events)
|
| 196 |
+
except json.JSONDecodeError:
|
| 197 |
+
events = []
|
| 198 |
+
else:
|
| 199 |
events = []
|
| 200 |
+
|
| 201 |
+
# Clean and validate events
|
| 202 |
+
cleaned_events = []
|
| 203 |
+
for event in events:
|
| 204 |
+
# Ensure each event has at least a name
|
| 205 |
+
if event.get('name'):
|
| 206 |
+
# Set default values if missing
|
| 207 |
+
event.setdefault('date', '')
|
| 208 |
+
event.setdefault('time', '')
|
| 209 |
+
event.setdefault('location', '')
|
| 210 |
+
event.setdefault('description', '')
|
| 211 |
+
cleaned_events.append(event)
|
| 212 |
+
|
| 213 |
+
return cleaned_events
|
| 214 |
|
| 215 |
+
except Exception as e:
|
| 216 |
+
gr.Warning(f"Parsing error: {str(e)}")
|
| 217 |
+
return []
|
|
|
|
|
|
|
| 218 |
|
| 219 |
def scrape_events(self):
|
| 220 |
"""Main method to scrape events from all URLs"""
|