import os
import chainlit as cl
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_ollama import ChatOllama

# Disable parallelism warnings
os.environ["TOKENIZERS_PARALLELISM"] = "false"

# Load PDF files from the specified directory
pdf_folder_path = "/teamspace/studios/this_studio/data_personlity"
pdf_files = [os.path.join(pdf_folder_path, f) for f in os.listdir(pdf_folder_path) if f.endswith(".pdf")]

documents = []
for pdf_file in pdf_files:
    try:
        loader = PyPDFLoader(pdf_file)
        documents.extend(loader.load())
    except Exception as e:
        print(f"⚠️ Skipping {pdf_file} due to error: {e}")

print(f"✅ Loaded {len(pdf_files)} valid PDF files successfully!")

# Split text into chunks for better processing
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
all_splits = text_splitter.split_documents(documents)

print(f"✅ Split text into {len(all_splits)} chunks!")

# Create embeddings and store them in ChromaDB
embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
vectorstore = Chroma.from_documents(
    documents=all_splits,
    collection_name="rag-chroma",
    embedding=embeddings,
    persist_directory="/teamspace/studios/this_studio/chromadb"
)
retriever = vectorstore.as_retriever()
print("✅ Data stored in ChromaDB successfully!")

# Load Llama 3 model
ollama_llm = "llama3"
model_local = ChatOllama(model=ollama_llm)

@cl.on_message
async def generate_report(message: cl.Message):
    """
    Handle user input and generate a brand identity report using RAG and Llama 3.
    """
    user_inputs = message.content.split("|")
    if len(user_inputs) < 8:
        await cl.Message(content="❌ Please enter all brand details separated by | (Brand Name | Industry | Core Values | Target Audience | Competitors | Vision | Tone | Visuals)").send()
        return
    
    # Extract user input into structured brand data
    brand_data = {
        "brand_name": user_inputs[0],
        "industry": user_inputs[1],
        "core_values": user_inputs[2],
        "target_audience": user_inputs[3],
        "competitors": user_inputs[4],
        "vision": user_inputs[5],
        "tone": user_inputs[6],
        "visuals": user_inputs[7],
    }
    
    # Retrieve relevant documents from ChromaDB
    query = "Brand personality, brand voice, visual identity, core values, marketing strategy"
    docs = retriever.get_relevant_documents(query)
    retrieved_content = "\n".join([doc.page_content for doc in docs])
    
    brand_data["context"] = retrieved_content
    
    # Construct prompt for Llama 3
    prompt = f"""
    You are a brand strategist and marketing expert. Your task is to generate a comprehensive brand identity report based on the provided inputs.

    The report should include:
    1) A clear brand persona,
    2) A compelling brand story,
    3) Defined brand voice and tone,
    4) Visual identity recommendations,
    5) Competitive analysis,
    6) Marketing and messaging strategy.

    Use the following details to create a unique and engaging brand identity:

    - **Brand Name:** {brand_data['brand_name']}
    - **Industry:** {brand_data['industry']}
    - **Core Values:** {brand_data['core_values']}
    - **Target Audience:** {brand_data['target_audience']}
    - **Competitors & Market Positioning:** {brand_data['competitors']}
    - **Brand Vision & Mission:** {brand_data['vision']}
    - **Preferred Brand Tone of Voice:** {brand_data['tone']}
    - **Visual Preferences (Colors, Typography, Style):** {brand_data['visuals']}
    
    Use only the following context for reference:
    {brand_data['context']}

    Generate a structured and well-organized brand identity report.
    """
    
    # Invoke Llama 3 to generate the report
    response = model_local.invoke(prompt)
    
    # Send the generated report back to the user
    await cl.Message(content=f"🔹 **Brand Identity Report for {brand_data['brand_name']}**\n\n{response}").send()