PersonaGPT / app.py
Rdyh's picture
Upload 16 files
334d556 verified
import os
import chainlit as cl
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_ollama import ChatOllama
# Disable parallelism warnings
os.environ["TOKENIZERS_PARALLELISM"] = "false"
# Load PDF files from the specified directory
pdf_folder_path = "/teamspace/studios/this_studio/data_personlity"
pdf_files = [os.path.join(pdf_folder_path, f) for f in os.listdir(pdf_folder_path) if f.endswith(".pdf")]
documents = []
for pdf_file in pdf_files:
try:
loader = PyPDFLoader(pdf_file)
documents.extend(loader.load())
except Exception as e:
print(f"⚠️ Skipping {pdf_file} due to error: {e}")
print(f"✅ Loaded {len(pdf_files)} valid PDF files successfully!")
# Split text into chunks for better processing
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
all_splits = text_splitter.split_documents(documents)
print(f"✅ Split text into {len(all_splits)} chunks!")
# Create embeddings and store them in ChromaDB
embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
vectorstore = Chroma.from_documents(
documents=all_splits,
collection_name="rag-chroma",
embedding=embeddings,
persist_directory="/teamspace/studios/this_studio/chromadb"
)
retriever = vectorstore.as_retriever()
print("✅ Data stored in ChromaDB successfully!")
# Load Llama 3 model
ollama_llm = "llama3"
model_local = ChatOllama(model=ollama_llm)
@cl.on_message
async def generate_report(message: cl.Message):
"""
Handle user input and generate a brand identity report using RAG and Llama 3.
"""
user_inputs = message.content.split("|")
if len(user_inputs) < 8:
await cl.Message(content="❌ Please enter all brand details separated by | (Brand Name | Industry | Core Values | Target Audience | Competitors | Vision | Tone | Visuals)").send()
return
# Extract user input into structured brand data
brand_data = {
"brand_name": user_inputs[0],
"industry": user_inputs[1],
"core_values": user_inputs[2],
"target_audience": user_inputs[3],
"competitors": user_inputs[4],
"vision": user_inputs[5],
"tone": user_inputs[6],
"visuals": user_inputs[7],
}
# Retrieve relevant documents from ChromaDB
query = "Brand personality, brand voice, visual identity, core values, marketing strategy"
docs = retriever.get_relevant_documents(query)
retrieved_content = "\n".join([doc.page_content for doc in docs])
brand_data["context"] = retrieved_content
# Construct prompt for Llama 3
prompt = f"""
You are a brand strategist and marketing expert. Your task is to generate a comprehensive brand identity report based on the provided inputs.
The report should include:
1) A clear brand persona,
2) A compelling brand story,
3) Defined brand voice and tone,
4) Visual identity recommendations,
5) Competitive analysis,
6) Marketing and messaging strategy.
Use the following details to create a unique and engaging brand identity:
- **Brand Name:** {brand_data['brand_name']}
- **Industry:** {brand_data['industry']}
- **Core Values:** {brand_data['core_values']}
- **Target Audience:** {brand_data['target_audience']}
- **Competitors & Market Positioning:** {brand_data['competitors']}
- **Brand Vision & Mission:** {brand_data['vision']}
- **Preferred Brand Tone of Voice:** {brand_data['tone']}
- **Visual Preferences (Colors, Typography, Style):** {brand_data['visuals']}
Use only the following context for reference:
{brand_data['context']}
Generate a structured and well-organized brand identity report.
"""
# Invoke Llama 3 to generate the report
response = model_local.invoke(prompt)
# Send the generated report back to the user
await cl.Message(content=f"🔹 **Brand Identity Report for {brand_data['brand_name']}**\n\n{response}").send()