|
|
| import os |
| import sys |
| import time |
| import warnings |
| from dotenv import load_dotenv |
| from appwrite.client import Client |
| from appwrite.services.databases import Databases |
|
|
| |
| warnings.filterwarnings("ignore") |
|
|
| |
| load_dotenv() |
|
|
| APPWRITE_ENDPOINT = os.getenv("APPWRITE_ENDPOINT") |
| APPWRITE_PROJECT_ID = os.getenv("APPWRITE_PROJECT_ID") |
| APPWRITE_API_KEY = os.getenv("APPWRITE_API_KEY") |
| APPWRITE_DATABASE_ID = os.getenv("APPWRITE_DATABASE_ID") |
|
|
| if not all([APPWRITE_ENDPOINT, APPWRITE_PROJECT_ID, APPWRITE_API_KEY, APPWRITE_DATABASE_ID]): |
| print("β Missing environment variables. Check .env file.") |
| sys.exit(1) |
|
|
| client = Client() |
| client.set_endpoint(APPWRITE_ENDPOINT) |
| client.set_project(APPWRITE_PROJECT_ID) |
| client.set_key(APPWRITE_API_KEY) |
|
|
| databases = Databases(client) |
|
|
| def wait_for_attribute(collection_id, key): |
| """Polls Appwrite until the attribute status is 'available'.""" |
| print(f" β³ Waiting for attribute '{key}' to be available...", end="", flush=True) |
| retries = 0 |
| max_retries = 60 |
| |
| while retries < max_retries: |
| try: |
| |
| |
| response = databases.list_attributes(APPWRITE_DATABASE_ID, collection_id) |
| attrs = response.get('attributes', []) |
| |
| target = next((a for a in attrs if a['key'] == key), None) |
| |
| if target: |
| if target['status'] == 'available': |
| print(" β
Ready.") |
| return True |
| elif target['status'] == 'failed': |
| print(" β Failed.") |
| return False |
| |
| time.sleep(2) |
| print(".", end="", flush=True) |
| retries += 1 |
| except Exception as e: |
| print(f" Error: {e}") |
| time.sleep(2) |
| retries += 1 |
| |
| print(" β Timeout.") |
| return False |
|
|
| def wait_for_index(collection_id, key): |
| """Polls Appwrite until the index status is 'available'.""" |
| print(f" β³ Waiting for index '{key}' to be available...", end="", flush=True) |
| retries = 0 |
| max_retries = 60 |
| |
| while retries < max_retries: |
| try: |
| response = databases.list_indexes(APPWRITE_DATABASE_ID, collection_id) |
| indexes = response.get('indexes', []) |
| |
| target = next((i for i in indexes if i['key'] == key), None) |
| |
| if target: |
| if target['status'] == 'available': |
| print(" β
Ready.") |
| return True |
| elif target['status'] == 'failed': |
| print(" β Failed.") |
| return False |
| |
| time.sleep(2) |
| print(".", end="", flush=True) |
| retries += 1 |
| except Exception as e: |
| print(f" Error: {e}") |
| time.sleep(2) |
| retries += 1 |
|
|
| print(" β Timeout.") |
| return False |
|
|
| def setup_v2(collection_id): |
| print(f"π Starting Robust Schema Setup for: {collection_id}") |
| |
| |
| |
| |
| |
| |
| print("\nπ¦ Creating Attributes (Synchronous Mode)...") |
| |
| try: |
| |
| print(" -> paper_id") |
| try: |
| databases.create_string_attribute(APPWRITE_DATABASE_ID, collection_id, "paper_id", 100, True) |
| except Exception as e: |
| if "already exists" not in str(e): print(f"Error: {e}") |
| wait_for_attribute(collection_id, "paper_id") |
|
|
| |
| print(" -> title") |
| try: |
| databases.create_string_attribute(APPWRITE_DATABASE_ID, collection_id, "title", 500, True) |
| except Exception as e: |
| if "already exists" not in str(e): print(f"Error: {e}") |
| wait_for_attribute(collection_id, "title") |
|
|
| |
| print(" -> summary") |
| try: |
| databases.create_string_attribute(APPWRITE_DATABASE_ID, collection_id, "summary", 5000, True) |
| except Exception as e: |
| if "already exists" not in str(e): print(f"Error: {e}") |
| wait_for_attribute(collection_id, "summary") |
|
|
| |
| print(" -> authors") |
| try: |
| databases.create_string_attribute(APPWRITE_DATABASE_ID, collection_id, "authors", 5000, False) |
| except Exception as e: |
| if "already exists" not in str(e): print(f"Error: {e}") |
| wait_for_attribute(collection_id, "authors") |
|
|
| |
| print(" -> published_at") |
| try: |
| databases.create_datetime_attribute(APPWRITE_DATABASE_ID, collection_id, "published_at", True) |
| except Exception as e: |
| if "already exists" not in str(e): print(f"Error: {e}") |
| wait_for_attribute(collection_id, "published_at") |
|
|
| |
| print(" -> pdf_url") |
| try: |
| databases.create_url_attribute(APPWRITE_DATABASE_ID, collection_id, "pdf_url", True) |
| except Exception as e: |
| if "already exists" not in str(e): print(f"Error: {e}") |
| wait_for_attribute(collection_id, "pdf_url") |
|
|
| |
| print(" -> category") |
| try: |
| databases.create_string_attribute(APPWRITE_DATABASE_ID, collection_id, "category", 50, True) |
| except Exception as e: |
| if "already exists" not in str(e): print(f"Error: {e}") |
| wait_for_attribute(collection_id, "category") |
| |
| |
| for stat in ["likes", "dislikes", "views"]: |
| print(f" -> {stat}") |
| try: |
| databases.create_integer_attribute(APPWRITE_DATABASE_ID, collection_id, stat, False, 0, 2147483647, 0) |
| except Exception as e: |
| if "already exists" not in str(e): print(f"Error: {e}") |
| wait_for_attribute(collection_id, stat) |
|
|
| except Exception as e: |
| print(f"β Critical Error during attribute creation: {e}") |
| return |
|
|
| |
| print("\nποΈ Creating Indexes (Now that attributes are ready)...") |
| |
| |
| print(" -> unique_paper_id") |
| try: |
| databases.create_index(APPWRITE_DATABASE_ID, collection_id, "unique_paper_id", "unique", ["paper_id"], ["ASC"]) |
| except Exception as e: |
| if "already exists" not in str(e): print(f"Error: {e}") |
| wait_for_index(collection_id, "unique_paper_id") |
|
|
| |
| print(" -> idx_published_at") |
| try: |
| databases.create_index(APPWRITE_DATABASE_ID, collection_id, "idx_published_at", "key", ["published_at"], ["DESC"]) |
| except Exception as e: |
| if "already exists" not in str(e): print(f"Error: {e}") |
| wait_for_index(collection_id, "idx_published_at") |
|
|
| |
| print(" -> idx_category") |
| try: |
| databases.create_index(APPWRITE_DATABASE_ID, collection_id, "idx_category", "key", ["category"], ["ASC"]) |
| except Exception as e: |
| if "already exists" not in str(e): print(f"Error: {e}") |
| wait_for_index(collection_id, "idx_category") |
|
|
| print("\nβ
Verification Complete: All attributes and indexes are AVAILABLE.") |
| print(" You may now run the ingestion script.") |
|
|
| if __name__ == "__main__": |
| if len(sys.argv) < 2: |
| print("Usage: python scripts/setup_research_v2.py <NEW_COLLECTION_ID>") |
| sys.exit(1) |
| |
| col_id = sys.argv[1] |
| setup_v2(col_id) |
|
|