| |
|
| | import os |
| | import sys |
| | import time |
| | import warnings |
| | from dotenv import load_dotenv |
| | from appwrite.client import Client |
| | from appwrite.services.databases import Databases |
| |
|
| | |
| | warnings.filterwarnings("ignore") |
| |
|
| | |
| | load_dotenv() |
| |
|
| | APPWRITE_ENDPOINT = os.getenv("APPWRITE_ENDPOINT") |
| | APPWRITE_PROJECT_ID = os.getenv("APPWRITE_PROJECT_ID") |
| | APPWRITE_API_KEY = os.getenv("APPWRITE_API_KEY") |
| | APPWRITE_DATABASE_ID = os.getenv("APPWRITE_DATABASE_ID") |
| |
|
| | if not all([APPWRITE_ENDPOINT, APPWRITE_PROJECT_ID, APPWRITE_API_KEY, APPWRITE_DATABASE_ID]): |
| | print("β Missing environment variables. Check .env file.") |
| | sys.exit(1) |
| |
|
| | client = Client() |
| | client.set_endpoint(APPWRITE_ENDPOINT) |
| | client.set_project(APPWRITE_PROJECT_ID) |
| | client.set_key(APPWRITE_API_KEY) |
| |
|
| | databases = Databases(client) |
| |
|
| | def wait_for_attribute(collection_id, key): |
| | """Polls Appwrite until the attribute status is 'available'.""" |
| | print(f" β³ Waiting for attribute '{key}' to be available...", end="", flush=True) |
| | retries = 0 |
| | max_retries = 60 |
| | |
| | while retries < max_retries: |
| | try: |
| | |
| | |
| | response = databases.list_attributes(APPWRITE_DATABASE_ID, collection_id) |
| | attrs = response.get('attributes', []) |
| | |
| | target = next((a for a in attrs if a['key'] == key), None) |
| | |
| | if target: |
| | if target['status'] == 'available': |
| | print(" β
Ready.") |
| | return True |
| | elif target['status'] == 'failed': |
| | print(" β Failed.") |
| | return False |
| | |
| | time.sleep(2) |
| | print(".", end="", flush=True) |
| | retries += 1 |
| | except Exception as e: |
| | print(f" Error: {e}") |
| | time.sleep(2) |
| | retries += 1 |
| | |
| | print(" β Timeout.") |
| | return False |
| |
|
| | def wait_for_index(collection_id, key): |
| | """Polls Appwrite until the index status is 'available'.""" |
| | print(f" β³ Waiting for index '{key}' to be available...", end="", flush=True) |
| | retries = 0 |
| | max_retries = 60 |
| | |
| | while retries < max_retries: |
| | try: |
| | response = databases.list_indexes(APPWRITE_DATABASE_ID, collection_id) |
| | indexes = response.get('indexes', []) |
| | |
| | target = next((i for i in indexes if i['key'] == key), None) |
| | |
| | if target: |
| | if target['status'] == 'available': |
| | print(" β
Ready.") |
| | return True |
| | elif target['status'] == 'failed': |
| | print(" β Failed.") |
| | return False |
| | |
| | time.sleep(2) |
| | print(".", end="", flush=True) |
| | retries += 1 |
| | except Exception as e: |
| | print(f" Error: {e}") |
| | time.sleep(2) |
| | retries += 1 |
| |
|
| | print(" β Timeout.") |
| | return False |
| |
|
| | def setup_v2(collection_id): |
| | print(f"π Starting Robust Schema Setup for: {collection_id}") |
| | |
| | |
| | |
| | |
| | |
| | |
| | print("\nπ¦ Creating Attributes (Synchronous Mode)...") |
| | |
| | try: |
| | |
| | print(" -> paper_id") |
| | try: |
| | databases.create_string_attribute(APPWRITE_DATABASE_ID, collection_id, "paper_id", 100, True) |
| | except Exception as e: |
| | if "already exists" not in str(e): print(f"Error: {e}") |
| | wait_for_attribute(collection_id, "paper_id") |
| |
|
| | |
| | print(" -> title") |
| | try: |
| | databases.create_string_attribute(APPWRITE_DATABASE_ID, collection_id, "title", 500, True) |
| | except Exception as e: |
| | if "already exists" not in str(e): print(f"Error: {e}") |
| | wait_for_attribute(collection_id, "title") |
| |
|
| | |
| | print(" -> summary") |
| | try: |
| | databases.create_string_attribute(APPWRITE_DATABASE_ID, collection_id, "summary", 5000, True) |
| | except Exception as e: |
| | if "already exists" not in str(e): print(f"Error: {e}") |
| | wait_for_attribute(collection_id, "summary") |
| |
|
| | |
| | print(" -> authors") |
| | try: |
| | databases.create_string_attribute(APPWRITE_DATABASE_ID, collection_id, "authors", 5000, False) |
| | except Exception as e: |
| | if "already exists" not in str(e): print(f"Error: {e}") |
| | wait_for_attribute(collection_id, "authors") |
| |
|
| | |
| | print(" -> published_at") |
| | try: |
| | databases.create_datetime_attribute(APPWRITE_DATABASE_ID, collection_id, "published_at", True) |
| | except Exception as e: |
| | if "already exists" not in str(e): print(f"Error: {e}") |
| | wait_for_attribute(collection_id, "published_at") |
| |
|
| | |
| | print(" -> pdf_url") |
| | try: |
| | databases.create_url_attribute(APPWRITE_DATABASE_ID, collection_id, "pdf_url", True) |
| | except Exception as e: |
| | if "already exists" not in str(e): print(f"Error: {e}") |
| | wait_for_attribute(collection_id, "pdf_url") |
| |
|
| | |
| | print(" -> category") |
| | try: |
| | databases.create_string_attribute(APPWRITE_DATABASE_ID, collection_id, "category", 50, True) |
| | except Exception as e: |
| | if "already exists" not in str(e): print(f"Error: {e}") |
| | wait_for_attribute(collection_id, "category") |
| | |
| | |
| | for stat in ["likes", "dislikes", "views"]: |
| | print(f" -> {stat}") |
| | try: |
| | databases.create_integer_attribute(APPWRITE_DATABASE_ID, collection_id, stat, False, 0, 2147483647, 0) |
| | except Exception as e: |
| | if "already exists" not in str(e): print(f"Error: {e}") |
| | wait_for_attribute(collection_id, stat) |
| |
|
| | except Exception as e: |
| | print(f"β Critical Error during attribute creation: {e}") |
| | return |
| |
|
| | |
| | print("\nποΈ Creating Indexes (Now that attributes are ready)...") |
| | |
| | |
| | print(" -> unique_paper_id") |
| | try: |
| | databases.create_index(APPWRITE_DATABASE_ID, collection_id, "unique_paper_id", "unique", ["paper_id"], ["ASC"]) |
| | except Exception as e: |
| | if "already exists" not in str(e): print(f"Error: {e}") |
| | wait_for_index(collection_id, "unique_paper_id") |
| |
|
| | |
| | print(" -> idx_published_at") |
| | try: |
| | databases.create_index(APPWRITE_DATABASE_ID, collection_id, "idx_published_at", "key", ["published_at"], ["DESC"]) |
| | except Exception as e: |
| | if "already exists" not in str(e): print(f"Error: {e}") |
| | wait_for_index(collection_id, "idx_published_at") |
| |
|
| | |
| | print(" -> idx_category") |
| | try: |
| | databases.create_index(APPWRITE_DATABASE_ID, collection_id, "idx_category", "key", ["category"], ["ASC"]) |
| | except Exception as e: |
| | if "already exists" not in str(e): print(f"Error: {e}") |
| | wait_for_index(collection_id, "idx_category") |
| |
|
| | print("\nβ
Verification Complete: All attributes and indexes are AVAILABLE.") |
| | print(" You may now run the ingestion script.") |
| |
|
| | if __name__ == "__main__": |
| | if len(sys.argv) < 2: |
| | print("Usage: python scripts/setup_research_v2.py <NEW_COLLECTION_ID>") |
| | sys.exit(1) |
| | |
| | col_id = sys.argv[1] |
| | setup_v2(col_id) |
| |
|