Spaces:
Sleeping
Sleeping
| import json | |
| import os | |
| import random | |
| import glob | |
| import pandas as pd | |
| from typing import List, Dict, Tuple | |
| # Load the fossils paths CSV to map fossil names to URLs | |
| FOSSILS_CSV_PATH = os.path.join(os.path.dirname(__file__), "fossils_paths.csv") | |
| def get_fossil_url_from_csv(fossil_name: str) -> Tuple[str, str]: | |
| """ | |
| Search the fossils_paths.csv to find the URL and family for a fossil. | |
| Args: | |
| fossil_name: Name of the fossil (e.g., "FLFO_002787A", "CU_0387cu") | |
| Returns: | |
| Tuple of (URL to the fossil image, family name) or (None, None) if not found | |
| """ | |
| try: | |
| if not os.path.exists(FOSSILS_CSV_PATH): | |
| return None, None | |
| df = pd.read_csv(FOSSILS_CSV_PATH) | |
| # Search for the fossil name in the file_name column | |
| # CSV filenames may not include the full prefix (e.g., "FLFO_002787A" -> "002787A") | |
| # Try multiple search patterns | |
| search_patterns = [ | |
| fossil_name, # Full name | |
| fossil_name.replace("FLFO_", "").replace("CU_", ""), # Without prefix | |
| fossil_name.split("_")[-1] if "_" in fossil_name else fossil_name, # Last part after underscore | |
| ] | |
| matching_rows = None | |
| for pattern in search_patterns: | |
| mask = df['file_name'].str.contains(pattern, case=False, na=False, regex=False) | |
| if mask.sum() > 0: | |
| matching_rows = df[mask] | |
| break | |
| if matching_rows is not None and len(matching_rows) > 0: | |
| # Get the first match | |
| row = matching_rows.iloc[0] | |
| file_path = row['file_name'] | |
| family = row.get('family', 'Unknown') | |
| # Convert to public URL | |
| folder_florissant = 'https://storage.googleapis.com/serrelab/prj_fossils/2024/Florissant_Fossil_v2.0/' | |
| folder_general = 'https://storage.googleapis.com/serrelab/prj_fossils/2024/General_Fossil_v2.0/' | |
| if 'Florissant_Fossil/512/full/jpg/' in file_path: | |
| public_path = file_path.replace( | |
| '/gpfs/data/tserre/irodri15/Fossils/new_data/leavesdb-v1_1/images/Fossil/Florissant_Fossil/512/full/jpg/', | |
| folder_florissant | |
| ) | |
| return public_path, family | |
| elif 'General_Fossil/512/full/jpg/' in file_path: | |
| public_path = file_path.replace( | |
| '/gpfs/data/tserre/irodri15/Fossils/new_data/leavesdb-v1_1/images/Fossil/General_Fossil/512/full/jpg/', | |
| folder_general | |
| ) | |
| return public_path, family | |
| return None, None | |
| except Exception as e: | |
| print(f"Error searching CSV for {fossil_name}: {e}") | |
| return None, None | |
| def load_plausible_fossils(json_dir: str = None) -> List[Dict]: | |
| """ | |
| Load all JSON files from the directory and extract entries marked as 'Plausible'. | |
| Args: | |
| json_dir: Directory containing the JSON response files (defaults to ../Unknown) | |
| Returns: | |
| List of dictionaries containing plausible fossil entries | |
| """ | |
| if json_dir is None: | |
| # Default to ../Unknown relative to the fossil_app directory | |
| json_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "Unknown") | |
| plausible_fossils = [] | |
| # Find all JSON files in the directory | |
| json_files = glob.glob(os.path.join(json_dir, "unidentified_fossil_responses*.json")) | |
| for json_file in json_files: | |
| try: | |
| with open(json_file, 'r', encoding='utf-8') as f: | |
| data = json.load(f) | |
| # Filter for plausible entries | |
| for entry in data: | |
| if entry.get("User Selection") == "Plausible": | |
| plausible_fossils.append({ | |
| "Serial Number": entry.get("Serial Number"), | |
| "Fossil Name": entry.get("Fossil Name"), | |
| "Source File": os.path.basename(json_file) | |
| }) | |
| except Exception as e: | |
| print(f"Error loading {json_file}: {e}") | |
| continue | |
| # Remove duplicates based on Fossil Name | |
| seen = set() | |
| unique_fossils = [] | |
| for fossil in plausible_fossils: | |
| fossil_name = fossil["Fossil Name"] | |
| if fossil_name not in seen: | |
| seen.add(fossil_name) | |
| unique_fossils.append(fossil) | |
| return unique_fossils | |
| def get_fossil_image_url(fossil_name: str) -> Tuple[str, str]: | |
| """ | |
| Get the image URL and family for a fossil by searching the CSV file. | |
| Args: | |
| fossil_name: Name of the fossil (e.g., "FLFO_002787A", "CU_0387cu") | |
| Returns: | |
| Tuple of (URL to the fossil image, family name) or (placeholder URL, "Unknown") | |
| """ | |
| url, family = get_fossil_url_from_csv(fossil_name) | |
| if url: | |
| return url, family | |
| # Fallback: construct a basic URL (may not work for all fossils) | |
| base_florissant = "https://storage.googleapis.com/serrelab/prj_fossils/2024/Florissant_Fossil_v2.0/" | |
| return f"{base_florissant}{fossil_name}/image.jpg", "Unknown" # Placeholder | |
| def get_random_plausible_fossils(count: int = 10, json_dir: str = None) -> List[Dict]: | |
| """ | |
| Get a random selection of plausible fossils. | |
| Args: | |
| count: Number of fossils to return | |
| json_dir: Directory containing JSON files | |
| Returns: | |
| List of fossil dictionaries with image URLs | |
| """ | |
| all_plausible = load_plausible_fossils(json_dir) | |
| if len(all_plausible) <= count: | |
| selected = all_plausible | |
| else: | |
| selected = random.sample(all_plausible, count) | |
| # Add image URLs and families | |
| for fossil in selected: | |
| url, family = get_fossil_image_url(fossil["Fossil Name"]) | |
| fossil["Image URL"] = url | |
| fossil["Family"] = family | |
| return selected | |
| def get_all_plausible_fossils(json_dir: str = None) -> List[Dict]: | |
| """ | |
| Get all plausible fossils with image URLs. | |
| Args: | |
| json_dir: Directory containing JSON files | |
| Returns: | |
| List of all fossil dictionaries with image URLs | |
| """ | |
| all_plausible = load_plausible_fossils(json_dir) | |
| # Add image URLs and families | |
| for fossil in all_plausible: | |
| url, family = get_fossil_image_url(fossil["Fossil Name"]) | |
| fossil["Image URL"] = url | |
| fossil["Family"] = family | |
| return all_plausible | |
| def format_fossil_html(fossil: Dict) -> str: | |
| """ | |
| Format a fossil entry as HTML with link. | |
| Args: | |
| fossil: Dictionary containing fossil information | |
| Returns: | |
| HTML string for the fossil | |
| """ | |
| fossil_name = fossil.get("Fossil Name", "Unknown") | |
| image_url = fossil.get("Image URL", "") | |
| serial_num = fossil.get("Serial Number", "") | |
| family = fossil.get("Family", "Unknown") | |
| # Check if URL is valid (not a placeholder) | |
| has_valid_url = image_url and "image.jpg" not in image_url and image_url != "" | |
| link_html = "" | |
| if has_valid_url: | |
| link_html = f""" | |
| <p style='margin: 10px 0 0 0;'> | |
| <a href='{image_url}' target='_blank' style='color: #0066cc; text-decoration: none; font-weight: bold; padding: 8px 15px; background-color: #e3f2fd; border-radius: 4px; display: inline-block;'> | |
| 🔗 View Image → | |
| </a> | |
| </p> | |
| """ | |
| else: | |
| link_html = """ | |
| <p style='margin: 10px 0 0 0; color: #666; font-style: italic;'> | |
| Image URL not available in database | |
| </p> | |
| """ | |
| html = f""" | |
| <div style='border: 1px solid #ddd; padding: 15px; margin: 10px; border-radius: 8px; background-color: #fafafa; box-shadow: 0 2px 4px rgba(0,0,0,0.1);'> | |
| <h3 style='margin-top: 0; margin-bottom: 10px; color: #333;'>{fossil_name}</h3> | |
| <p style='margin: 5px 0;'><strong>Serial Number:</strong> {serial_num}</p> | |
| <p style='margin: 5px 0;'><strong>Family:</strong> {family}</p> | |
| {link_html} | |
| </div> | |
| """ | |
| return html | |