Spaces:

droov
/

polygrapher

Sleeping

App Files Files Community

dhruv575 commited on Dec 18, 2025

Commit

471541d

1 Parent(s): 35213a7

optional image

Browse files

Files changed (4) hide show

app/email_new_converter.py +8 -4
app/post_process.py +10 -5
app/templateify_new_service.py +6 -1
email3.py +123 -90

app/email_new_converter.py CHANGED Viewed

@@ -271,6 +271,10 @@ class EmailNewConverter:
             elif img_alt == "top news image":
                 is_top_news_image = True
                 logger.info(f"Identified top news image by alt text (should preserve aspect ratio): {src[:50]}...")
             else:
                 # Check if image is in footer section by looking for footer comment in HTML structure
                 # Get the HTML string representation of parent elements to check for footer comments
@@ -289,8 +293,8 @@ class EmailNewConverter:
                     parent = parent.find_parent()
                     depth += 1
-            # Combine checks - if it's footer, top comment, or top news, don't make square
-            is_non_square_image = is_footer_image or is_top_comment_image or is_top_news_image
             # Check if image is already on Cloudinary
             is_cloudinary = "res.cloudinary.com" in src
@@ -342,9 +346,9 @@ class EmailNewConverter:
                         logger.warning(f"Cloudinary upload succeeded but no URL returned for {image_url[:50]}...")
                         continue
-                # For non-square images (footer, top comment, top news), preserve aspect ratio - no transformations
                 if is_non_square_image:
-                    image_type = "footer" if is_footer_image else ("top comment" if is_top_comment_image else "top news")
                     logger.info(f"{image_type.capitalize()} image processed (preserving aspect ratio): {src[:50]}...")
                     img["src"] = cloudinary_url
                 else:

             elif img_alt == "top news image":
                 is_top_news_image = True
                 logger.info(f"Identified top news image by alt text (should preserve aspect ratio): {src[:50]}...")
+            # Last word images can be identified by alt text
+            elif img_alt == "last word image":
+                is_last_word_image = True
+                logger.info(f"Identified last word image by alt text (should preserve aspect ratio): {src[:50]}...")
             else:
                 # Check if image is in footer section by looking for footer comment in HTML structure
                 # Get the HTML string representation of parent elements to check for footer comments
                     parent = parent.find_parent()
                     depth += 1
+            # Combine checks - if it's footer, top comment, top news, or last word, don't make square
+            is_non_square_image = is_footer_image or is_top_comment_image or is_top_news_image or is_last_word_image
             # Check if image is already on Cloudinary
             is_cloudinary = "res.cloudinary.com" in src
                         logger.warning(f"Cloudinary upload succeeded but no URL returned for {image_url[:50]}...")
                         continue
+                # For non-square images (footer, top comment, top news, last word), preserve aspect ratio - no transformations
                 if is_non_square_image:
+                    image_type = "footer" if is_footer_image else ("top comment" if is_top_comment_image else ("last word" if is_last_word_image else "top news"))
                     logger.info(f"{image_type.capitalize()} image processed (preserving aspect ratio): {src[:50]}...")
                     img["src"] = cloudinary_url
                 else:

app/post_process.py CHANGED Viewed

@@ -302,14 +302,19 @@ def fix_cloudinary_image_transformations(html_content: str) -> str:
         # Check if it's a top news image (inside top_news_box or alt="Top news image")
         if 'alt="Top news image"' in full_tag or 'alt=\'Top news image\'' in full_tag:
             is_footer_or_top_news = True
-        else:
-            # Check if it's inside a top_news_box by looking for the class in nearby HTML
             img_pos = html_content.find(full_tag)
             if img_pos != -1:
-                # Check if there's a top_news_box before this img tag (within 1000 chars)
                 before_img = html_content[max(0, img_pos - 1000):img_pos]
-                # Look for top_news_box class (handle both quoted and unquoted, with/without spaces)
-                if re.search(r'class\s*=\s*["\']?[^"\'>]*top_news_box', before_img, re.IGNORECASE):
                     is_footer_or_top_news = True
         # Only add transformations to market images (48px square images)

         # Check if it's a top news image (inside top_news_box or alt="Top news image")
         if 'alt="Top news image"' in full_tag or 'alt=\'Top news image\'' in full_tag:
             is_footer_or_top_news = True
+        # Check if it's a last word image (inside last_word_box or alt="Last word image")
+        if 'alt="Last word image"' in full_tag or 'alt=\'Last word image\'' in full_tag:
+            is_footer_or_top_news = True
+        if not is_footer_or_top_news:
+            # Check if it's inside a top_news_box or last_word_box by looking for the class in nearby HTML
             img_pos = html_content.find(full_tag)
             if img_pos != -1:
+                # Check if there's a top_news_box or last_word_box before this img tag (within 1000 chars)
                 before_img = html_content[max(0, img_pos - 1000):img_pos]
+                # Look for top_news_box or last_word_box class (handle both quoted and unquoted, with/without spaces)
+                if re.search(r'class\s*=\s*["\']?[^"\'>]*(top_news_box|last_word_box)', before_img, re.IGNORECASE):
                     is_footer_or_top_news = True
         # Only add transformations to market images (48px square images)

app/templateify_new_service.py CHANGED Viewed

@@ -805,9 +805,14 @@ class TemplateifyNewService:
                 if "market-link" not in parent_link["class"]:
                     parent_link["class"].append("market-link")
-            # Find and tokenize the image
             img = node.find("img")
             if img:
                 img["src"] = "{{LAST_WORD_IMAGE}}"
                 register("{{LAST_WORD_IMAGE}}", "Last word image URL")

                 if "market-link" not in parent_link["class"]:
                     parent_link["class"].append("market-link")
+            # Find and tokenize the image, wrap in conditional
             img = node.find("img")
             if img:
+                # Wrap image in conditional block so it only shows if image URL is provided
+                opening_tag = NavigableString("{{#LAST_WORD_IMAGE}}")
+                closing_tag = NavigableString("{{/LAST_WORD_IMAGE}}")
+                img.insert_before(opening_tag)
+                img.insert_after(closing_tag)
                 img["src"] = "{{LAST_WORD_IMAGE}}"
                 register("{{LAST_WORD_IMAGE}}", "Last word image URL")

email3.py CHANGED Viewed

@@ -1050,35 +1050,33 @@ class PolymarketEmailGenerator:
             return []
     def fetch_whale_moves(self) -> List[Dict[str, Any]]:
-        """Fetch whale moves/insider positions from ClickHouse"""
         print("Fetching whale moves...")
-        # ClickHouse configuration
-        query_id = os.getenv('CLICKHOUSE_QUERY_ID')
-        user = os.getenv('CLICKHOUSE_USER')
-        password = os.getenv('CLICKHOUSE_PASSWORD')
-        if not all([query_id, user, password]):
-            print("   Warning: ClickHouse credentials not configured")
-            print("   Falling back to placeholder whale move data")
-            return self._placeholder_whale_moves()
-        url = f'https://queries.clickhouse.cloud/run/{query_id}?format=JSONEachRow'
         max_retries = 3
-        timeout = 60
         for attempt in range(max_retries):
             try:
                 if attempt > 0:
                     wait_time = 2 ** attempt
                     print(f"   Retry {attempt}/{max_retries} after {wait_time}s...")
-                    import time
                     time.sleep(wait_time)
                 response = requests.get(
                     url,
-                    auth=(user, password),
                     headers={'Content-Type': 'application/json'},
                     timeout=timeout
                 )
@@ -1086,77 +1084,102 @@ class PolymarketEmailGenerator:
                 break
             except requests.exceptions.RequestException as e:
-                error_msg = str(e)
-                # Check if it's a permissions error
-                if "Not enough privileges" in error_msg or "dim_users" in error_msg:
-                    print(f"   ⚠️  ClickHouse permissions error - the query needs access to core.dim_users table")
-                    print(f"   This may have changed on the ClickHouse side. Contact your admin to fix permissions.")
-                    print(f"   Skipping whale moves section...")
                     return self._placeholder_whale_moves()
-                else:
-                    print(f"   Warning: ClickHouse query attempt {attempt + 1}/{max_retries} failed: {e}")
-                    if attempt == max_retries - 1:
-                        print(f"   Error: Failed after {max_retries} attempts")
-                        return self._placeholder_whale_moves()
         # Process the successful response
         whale_moves = []
-        for line in response.text.strip().split('\n'):
-            if not line:
-                continue
             try:
-                data = json.loads(line)
-                total_value = float(data.get('total_open_value', 0))
-                if total_value >= 25000:  # Lowered threshold to show more whale moves
-                    open_values = [float(v) for v in data.get('open_values', [])]
-                    market_titles = data.get('open_market_titles', [])
-                    event_titles = data.get('open_event_titles', [])
-                    outcome_names = data.get('open_outcome_names', [])
-                    if open_values:
-                        max_idx = open_values.index(max(open_values))
-                        market_title = market_titles[max_idx] if max_idx < len(market_titles) else ""
-                        event_title = event_titles[max_idx] if max_idx < len(event_titles) else ""
-                        outcome = outcome_names[max_idx] if max_idx < len(outcome_names) else ""
-                        # Use event title if market title is empty
-                        display_market = market_title if market_title else event_title
-                        if not display_market:
-                            display_market = "Unknown Market"
-                        slug = event_title.lower().replace(' ', '-').replace('?', '')[:50] if event_title else ""
-                        outcome_str = f" on {outcome}" if outcome else ""
-                        title = f"${total_value:,.0f} position{outcome_str}"
-                        user_name = data.get('user_name', 'Anonymous')
-                        whale_moves.append({
-                            "title": title,
-                            "market": display_market,
-                            "event": event_title,
-                            "slug": slug,
-                            "amount": total_value,
-                            "user_name": user_name,
-                            "user_profile_url": user_name,
-                            "distinct_positions": int(data.get('distinct_positions', 1)),
-                            "timestamp": datetime.now().isoformat()
-                        })
-            except (json.JSONDecodeError, KeyError, ValueError) as e:
-                print(f"   Warning: Failed to parse insider data: {e}")
                 continue
         whale_moves.sort(key=lambda x: x['amount'], reverse=True)
         if not whale_moves:
-            print("   No whale positions found, injecting placeholder data")
             return self._placeholder_whale_moves()
-        print(f"   Found {len(whale_moves)} whale positions")
         # Enrich with market images and user images
         whale_moves = self._enrich_whale_moves_with_images(whale_moves)
@@ -1166,34 +1189,44 @@ class PolymarketEmailGenerator:
     def _enrich_whale_moves_with_images(self, whale_moves: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
         """Enrich whale moves with market images and user profile images."""
         print("   Fetching market images for whale moves...")
-        user_image_url = "https://i.ibb.co/23VYpRcK/polywhale.png"
         for move in whale_moves:
-            # Add user image (static for now)
-            move['user_image'] = user_image_url
-            # Fetch market image using slug
-            slug = move.get('slug', '')
-            if slug:
-                try:
-                    details = self.fetch_market_details(slug)
-                    market_image = details.get('image', '')
-                    move['market_image'] = market_image
-                    if market_image:
-                        print(f"      ✓ Found image for {slug[:50]}")
-                    else:
-                        print(f"      ⚠ No image found for {slug[:50]}")
-                except Exception as e:
-                    print(f"      ⚠ Error fetching image for {slug[:50]}: {e}")
-                    move['market_image'] = ''
             else:
-                print(f"      ⚠ No slug available for market: {move.get('market', 'Unknown')}")
-                move['market_image'] = ''
         return whale_moves
     def _placeholder_whale_moves(self) -> List[Dict[str, Any]]:
-        """Return fallback whale move data when ClickHouse is unavailable."""
         placeholder_moves = _fresh_placeholder_whales()
         print(f"   Using {len(placeholder_moves)} placeholder whale entries")
         # Enrich placeholder moves with images too

             return []
     def fetch_whale_moves(self) -> List[Dict[str, Any]]:
+        """Fetch whale moves from Polymarket trades API"""
         print("Fetching whale moves...")
+        import time
+        # Polymarket trades API endpoint
+        url = 'https://data-api.polymarket.com/trades'
+        params = {
+            'limit': 100,
+            'takerOnly': 'true',
+            'filterType': 'CASH',
+            'filterAmount': 50000
+        }
         max_retries = 3
+        timeout = 30
         for attempt in range(max_retries):
             try:
                 if attempt > 0:
                     wait_time = 2 ** attempt
                     print(f"   Retry {attempt}/{max_retries} after {wait_time}s...")
                     time.sleep(wait_time)
                 response = requests.get(
                     url,
+                    params=params,
                     headers={'Content-Type': 'application/json'},
                     timeout=timeout
                 )
                 break
             except requests.exceptions.RequestException as e:
+                print(f"   Warning: API request attempt {attempt + 1}/{max_retries} failed: {e}")
+                if attempt == max_retries - 1:
+                    print(f"   Error: Failed after {max_retries} attempts")
+                    print("   Falling back to placeholder whale move data")
                     return self._placeholder_whale_moves()
         # Process the successful response
+        try:
+            trades_data = response.json()
+        except json.JSONDecodeError as e:
+            print(f"   Error: Failed to parse API response: {e}")
+            print("   Falling back to placeholder whale move data")
+            return self._placeholder_whale_moves()
+        if not isinstance(trades_data, list):
+            print(f"   Error: Expected array response, got {type(trades_data)}")
+            print("   Falling back to placeholder whale move data")
+            return self._placeholder_whale_moves()
+        # Get current Unix timestamp
+        current_timestamp = int(time.time())
+        # 24 hours in seconds
+        twenty_four_hours_ago = current_timestamp - (24 * 60 * 60)
+        # Sports slugs to filter out
+        sports_keywords = ['nba', 'nhl', 'epl', 'ucl', 'nfl']
+        # Filter and process trades
         whale_moves = []
+        for trade in trades_data:
             try:
+                # Filter out trades older than 24 hours
+                trade_timestamp = trade.get('timestamp', 0)
+                if trade_timestamp < twenty_four_hours_ago:
+                    continue
+                # Filter out sports-related slugs
+                slug = trade.get('slug', '').lower()
+                if any(keyword in slug for keyword in sports_keywords):
+                    continue
+                # Extract data
+                size = float(trade.get('size', 0))
+                price = float(trade.get('price', 0))
+                amount = size * price  # Calculate total amount
+                title_text = trade.get('title', 'Unknown Market')
+                outcome = trade.get('outcome', '')
+                event_slug = trade.get('eventSlug', '')
+                # Build title with outcome
+                outcome_str = f" on {outcome}" if outcome else ""
+                title = f"${amount:,.0f} position{outcome_str}"
+                # Use slug from trade, or generate from title
+                if event_slug:
+                    slug_for_display = event_slug
+                else:
+                    slug_for_display = title_text.lower().replace(' ', '-').replace('?', '').replace("'", '')[:50]
+                # Get user information
+                user_name = trade.get('name', trade.get('pseudonym', 'Anonymous'))
+                profile_image = trade.get('profileImageOptimized') or trade.get('profileImage', '')
+                # Get market icon if available
+                market_icon = trade.get('icon', '')
+                whale_moves.append({
+                    "title": title,
+                    "market": title_text,
+                    "event": title_text,
+                    "slug": slug_for_display,
+                    "amount": amount,
+                    "user_name": user_name,
+                    "user_profile_url": user_name,
+                    "distinct_positions": 1,
+                    "timestamp": datetime.now().isoformat(),
+                    "profile_image": profile_image,  # Store for enrichment
+                    "market_icon": market_icon  # Store for enrichment
+                })
+            except (KeyError, ValueError, TypeError) as e:
+                print(f"   Warning: Failed to process trade data: {e}")
                 continue
+        # Sort by amount (descending) and limit to top 10
         whale_moves.sort(key=lambda x: x['amount'], reverse=True)
+        whale_moves = whale_moves[:10]
         if not whale_moves:
+            print("   No whale moves found after filtering")
+            print("   Falling back to placeholder whale move data")
             return self._placeholder_whale_moves()
+        print(f"   Found {len(whale_moves)} whale moves after filtering")
         # Enrich with market images and user images
         whale_moves = self._enrich_whale_moves_with_images(whale_moves)
     def _enrich_whale_moves_with_images(self, whale_moves: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
         """Enrich whale moves with market images and user profile images."""
         print("   Fetching market images for whale moves...")
+        default_user_image = "https://res.cloudinary.com/db1zelfhi/image/upload/v1765869030/polygraph/images/jhvsxfndu0boigqz3kjw.png"
         for move in whale_moves:
+            # Use profile image from API if available, otherwise use default
+            profile_image = move.pop('profile_image', '')  # Remove from dict after getting
+            if profile_image and profile_image.strip():
+                move['user_image'] = profile_image
+            else:
+                move['user_image'] = default_user_image
+            # Use market icon from API if available, otherwise fetch using slug
+            market_icon = move.pop('market_icon', '')  # Remove from dict after getting
+            if market_icon and market_icon.strip():
+                move['market_image'] = market_icon
+                print(f"      ✓ Using icon from API for {move.get('market', 'Unknown')[:50]}")
             else:
+                # Fetch market image using slug as fallback
+                slug = move.get('slug', '')
+                if slug:
+                    try:
+                        details = self.fetch_market_details(slug)
+                        market_image = details.get('image', '')
+                        move['market_image'] = market_image
+                        if market_image:
+                            print(f"      ✓ Found image for {slug[:50]}")
+                        else:
+                            print(f"      ⚠ No image found for {slug[:50]}")
+                    except Exception as e:
+                        print(f"      ⚠ Error fetching image for {slug[:50]}: {e}")
+                        move['market_image'] = ''
+                else:
+                    print(f"      ⚠ No slug available for market: {move.get('market', 'Unknown')}")
+                    move['market_image'] = ''
         return whale_moves
     def _placeholder_whale_moves(self) -> List[Dict[str, Any]]:
+        """Return fallback whale move data when Polymarket API is unavailable."""
         placeholder_moves = _fresh_placeholder_whales()
         print(f"   Using {len(placeholder_moves)} placeholder whale entries")
         # Enrich placeholder moves with images too