Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| from eventbrite_scrapper import Eventbrite | |
| from sentence_transformers import SentenceTransformer | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| import numpy as np | |
| from datetime import datetime | |
| from dataclasses import dataclass, field, replace | |
| from typing import List, Any | |
| # Dataclasses for event structure | |
| class EventAddress: | |
| latitude: float = None | |
| longitude: float = None | |
| region: str = None | |
| postal_code: str = None | |
| address_1: str = None | |
| class EventVenue: | |
| id: str = None | |
| name: str = None | |
| url: str = None | |
| address: EventAddress = field(default_factory=lambda: EventAddress()) | |
| class EventImage: | |
| url: str = None | |
| class EventTag: | |
| text: str = None | |
| class Event: | |
| id: str = None | |
| name: str = None | |
| url: str = None | |
| is_online_event: bool = False | |
| short_description: str = None | |
| published_datetime: datetime = None | |
| start_datetime: datetime = None | |
| end_datetime: datetime = None | |
| timezone: str = None | |
| hide_start_date: bool = False | |
| hide_end_date: bool = False | |
| parent_event_url: str = None | |
| series_id: str = None | |
| primary_venue: EventVenue = field(default_factory=lambda: EventVenue()) | |
| tickets_url: str = None | |
| checkout_flow: str = None | |
| language: str = None | |
| image: EventImage = field(default_factory=lambda: EventImage()) | |
| tags_categories: tuple = field(default_factory=tuple) | |
| tags_formats: tuple = field(default_factory=tuple) | |
| tags_by_organizer: tuple = field(default_factory=tuple) | |
| def __hash__(self): | |
| return hash(self.id) if self.id else hash((self.name, self.is_online_event, self.start_datetime, self.primary_venue.name)) | |
| # Event Retrieval Pipeline | |
| class EventbriteRAGPipeline: | |
| def __init__(self, events: List[Event], embedding_model: str = 'all-MiniLM-L6-v2'): | |
| self.events = [ | |
| replace( | |
| event, | |
| tags_categories=tuple(event.tags_categories), | |
| tags_formats=tuple(event.tags_formats), | |
| tags_by_organizer=tuple(event.tags_by_organizer), | |
| ) | |
| for event in events | |
| ] | |
| self.model = SentenceTransformer(embedding_model) | |
| self.event_embeddings = self._compute_embeddings() | |
| def _compute_embeddings(self) -> List[np.ndarray]: | |
| def event_to_text(event: Event) -> str: | |
| text_parts = [ | |
| event.name or '', | |
| event.short_description or '', | |
| ' '.join(tag.text for tag in event.tags_categories), | |
| ' '.join(tag.text for tag in event.tags_formats), | |
| ' '.join(tag.text for tag in event.tags_by_organizer), | |
| event.primary_venue.name or '', | |
| event.primary_venue.address.region or '', | |
| event.language or '' | |
| ] | |
| return ' '.join(filter(bool, text_parts)) | |
| return self.model.encode([event_to_text(event) for event in self.events]) | |
| def query_events(self, query: str, top_k: int = 5) -> List[Event]: | |
| # query_embedding = self.model.encode(query).reshape(1, -1) | |
| # similarities = cosine_similarity(query_embedding, self.event_embeddings)[0] | |
| # top_indices = similarities.argsort()[-top_k:][::-1] | |
| # return [self.events[idx] for idx in top_indices] | |
| query_embedding = self.model.encode(query).reshape(1, -1) | |
| similarities = cosine_similarity(query_embedding, self.event_embeddings)[0] | |
| top_indices = similarities.argsort()[-(top_k * 2):][::-1] # Get extra events to filter duplicates | |
| unique_events = {} | |
| for idx in top_indices: | |
| event = self.events[idx] | |
| if event.id not in unique_events: | |
| unique_events[event.id] = event | |
| if len(unique_events) == top_k: | |
| break | |
| return list(unique_events.values()) | |
| # Event Evaluator | |
| class EventEvaluator: | |
| def __init__(self, pipeline): | |
| self.pipeline = pipeline | |
| def evaluate_query(self, query): | |
| """Evaluate a single query and return results.""" | |
| # top_events = self.pipeline.query_events(query) | |
| # results = [] | |
| # for event in top_events: | |
| # result = { | |
| # "Event Name": event.name, | |
| # "Online Event": event.is_online_event, | |
| # "Start Time": event.start_datetime, | |
| # "Venue Address": event.primary_venue.address.address_1, | |
| # "Venue Name": event.primary_venue.name, | |
| # "Description": event.short_description, | |
| # "Tickets URL": event.tickets_url, | |
| # "Language": event.language, | |
| # "Categories": [tag.text for tag in event.tags_categories], | |
| # } | |
| # results.append(result) | |
| top_events = self.pipeline.query_events(query) | |
| results = [] | |
| seen = set() | |
| for event in top_events: | |
| if event.id not in seen: # Ensure unique events | |
| seen.add(event.id) | |
| results.append({ | |
| "Event Name": event.name, | |
| "Online Event": event.is_online_event, | |
| "Start Time": event.start_datetime, | |
| "Venue Address": event.primary_venue.address.address_1, | |
| "Venue Name": event.primary_venue.name, | |
| "Description": event.short_description, | |
| "Tickets URL": event.tickets_url, | |
| "Language": event.language, | |
| "Categories": [tag.text for tag in event.tags_categories], | |
| }) | |
| return results | |
| # Fetch events from Eventbrite API | |
| client = Eventbrite() | |
| events = client.search_events.get_results( | |
| region="ca--los-angeles", | |
| dt_start="2025-02-26", | |
| dt_end="2025-02-28", | |
| max_pages=6, | |
| ) | |
| # Initialize pipeline and evaluator | |
| rag_pipeline = EventbriteRAGPipeline(events) | |
| evaluator = EventEvaluator(rag_pipeline) | |
| # Streamlit UI | |
| st.title("🎟️ Event Search App") | |
| st.write("Find events based on your interests!") | |
| query = st.text_input("🔎 Enter your search query:") | |
| # if query: | |
| # results = evaluator.evaluate_query(query) | |
| # if results: | |
| # df = pd.DataFrame(results) | |
| # st.dataframe(df) # Display results as a formatted table | |
| # else: | |
| # st.warning("No results found.") | |
| if query: | |
| print(f"🔍 Processing query: {query} ") # Debugging query input | |
| results = evaluator.evaluate_query(query) | |
| if results: | |
| df = pd.DataFrame(results) | |
| st.dataframe(df) # Display results as a formatted table | |
| else: | |
| st.warning("No results found.") | |