import asyncio
import json
from cbh.core.config import settings
from langchain_core.prompts import ChatPromptTemplate
from cbh.api.platforms.models import PlatformModel

system_prompt = """You are a data structuring assistant. Your task is to convert raw AI tool data into a strictly typed structured format.

Given the raw tool data below, return a single JSON object matching the PlatformModel schema exactly.

Rules:
- "name": extract the tool name as-is.
- "category": map to an integer enum:
  1=Web apps/SaaS/MVP, 2=Websites/Landing pages, 3=Mobile apps, 4=UI/UX Design,
  5=AI Coding tools, 6=Automation/AI agents, 7=Video/Creative, 8=SEO/GEO,
  9=Growth/Social/Reddit, 10=Research/Analytics.
- "subcategory": use the subcategory string as-is.
- "oneLinePos": use the "One-line positioning" value.
- "description": use the "Detailed description" value.
- "userQueries": split "User query covered" into a list of distinct user intents/queries. If there is only one, return a single-element list.
- "idealCases": use the "Best if client wants" value.
- "personas": split "Recommended persona" by commas into a list of individual personas.
- "level": map skill level to an integer enum: 1=Low, 2=Low-to-Medium, 3=Medium, 4=Medium-to-High, 5=High.
- "toolType": map to an integer enum: 1=No-code, 2=Hybrid, 3=Dev.
- "focus": map "Platform focus" to a list of integer enums: 1=Web, 2=Mobile, 3=Desktop, 4=Multi-platform, 5=Mobile design, 6=Developer workflow, 7=Desktop/Multi-platform dev.
- "productStage": split "Best product stage" by commas into a list.
- "keyStrengths": split "Key strengths" by semicolons into a list. Trim whitespace.
- "caveats": split "Main caveats" by semicolons into a list. If only one caveat, return a single-element list. Trim whitespace.
- "monetizationPriority": map to an integer enum: 1=Low, 2=Medium, 3=High.
- "website": use the "Website" URL as-is.
- "internalNotes": use the "Internal notes" value.

Raw tool data:
{raw_data}"""

async def upload_data(item: dict) -> PlatformModel:
    prompt = ChatPromptTemplate.from_messages([
        ("system", system_prompt),
    ])
    chain = prompt | settings.get_llm(model="gpt-5.4", schema=PlatformModel)
    result = await chain.ainvoke({"raw_data": json.dumps(item, ensure_ascii=False)})
    print(f"Processed: {result.name}")
    return result


async def main():
    with open("ai_tools.json", "r") as f:
        data = json.load(f)

    results = []
    batch_size = 10
    for i in range(0, len(data), batch_size):
        batch = data[i:i+batch_size]
        platforms = await asyncio.gather(*[upload_data(item) for item in batch])
        results.extend(platforms)

    await settings.DB_CLIENT.platforms.insert_many([platform.to_mongo() for platform in results])


if __name__ == "__main__":
    asyncio.run(main())