import os import chainlit as cl from langchain_community.document_loaders import PyPDFLoader from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_community.embeddings import SentenceTransformerEmbeddings from langchain_community.vectorstores import Chroma from langchain_ollama import ChatOllama # Disable parallelism warnings os.environ["TOKENIZERS_PARALLELISM"] = "false" # Load PDF files from the specified directory pdf_folder_path = "/teamspace/studios/this_studio/data_personlity" pdf_files = [os.path.join(pdf_folder_path, f) for f in os.listdir(pdf_folder_path) if f.endswith(".pdf")] documents = [] for pdf_file in pdf_files: try: loader = PyPDFLoader(pdf_file) documents.extend(loader.load()) except Exception as e: print(f"⚠️ Skipping {pdf_file} due to error: {e}") print(f"✅ Loaded {len(pdf_files)} valid PDF files successfully!") # Split text into chunks for better processing text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) all_splits = text_splitter.split_documents(documents) print(f"✅ Split text into {len(all_splits)} chunks!") # Create embeddings and store them in ChromaDB embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2") vectorstore = Chroma.from_documents( documents=all_splits, collection_name="rag-chroma", embedding=embeddings, persist_directory="/teamspace/studios/this_studio/chromadb" ) retriever = vectorstore.as_retriever() print("✅ Data stored in ChromaDB successfully!") # Load Llama 3 model ollama_llm = "llama3" model_local = ChatOllama(model=ollama_llm) @cl.on_message async def generate_report(message: cl.Message): """ Handle user input and generate a brand identity report using RAG and Llama 3. """ user_inputs = message.content.split("|") if len(user_inputs) < 8: await cl.Message(content="❌ Please enter all brand details separated by | (Brand Name | Industry | Core Values | Target Audience | Competitors | Vision | Tone | Visuals)").send() return # Extract user input into structured brand data brand_data = { "brand_name": user_inputs[0], "industry": user_inputs[1], "core_values": user_inputs[2], "target_audience": user_inputs[3], "competitors": user_inputs[4], "vision": user_inputs[5], "tone": user_inputs[6], "visuals": user_inputs[7], } # Retrieve relevant documents from ChromaDB query = "Brand personality, brand voice, visual identity, core values, marketing strategy" docs = retriever.get_relevant_documents(query) retrieved_content = "\n".join([doc.page_content for doc in docs]) brand_data["context"] = retrieved_content # Construct prompt for Llama 3 prompt = f""" You are a brand strategist and marketing expert. Your task is to generate a comprehensive brand identity report based on the provided inputs. The report should include: 1) A clear brand persona, 2) A compelling brand story, 3) Defined brand voice and tone, 4) Visual identity recommendations, 5) Competitive analysis, 6) Marketing and messaging strategy. Use the following details to create a unique and engaging brand identity: - **Brand Name:** {brand_data['brand_name']} - **Industry:** {brand_data['industry']} - **Core Values:** {brand_data['core_values']} - **Target Audience:** {brand_data['target_audience']} - **Competitors & Market Positioning:** {brand_data['competitors']} - **Brand Vision & Mission:** {brand_data['vision']} - **Preferred Brand Tone of Voice:** {brand_data['tone']} - **Visual Preferences (Colors, Typography, Style):** {brand_data['visuals']} Use only the following context for reference: {brand_data['context']} Generate a structured and well-organized brand identity report. """ # Invoke Llama 3 to generate the report response = model_local.invoke(prompt) # Send the generated report back to the user await cl.Message(content=f"🔹 **Brand Identity Report for {brand_data['brand_name']}**\n\n{response}").send()