|
|
import os
|
|
|
import json
|
|
|
from sqlalchemy.orm import Session
|
|
|
from models import Product, CustomerProfile, Conversation, Message
|
|
|
from typing import List
|
|
|
from PyPDF2 import PdfReader
|
|
|
|
|
|
|
|
|
|
|
|
def _read_pdf_text(file_path: str) -> str:
|
|
|
try:
|
|
|
reader = PdfReader(file_path)
|
|
|
return "\n".join(page.extract_text() or "" for page in reader.pages)
|
|
|
except Exception:
|
|
|
return ""
|
|
|
|
|
|
def extract_and_upsert_products_from_llm(db: Session, public_pdf_path: str, private_pdf_path: str):
|
|
|
"""Extracts product info from PDFs using an LLM and saves to DB."""
|
|
|
public_text = _read_pdf_text(public_pdf_path)
|
|
|
private_text = _read_pdf_text(private_pdf_path)
|
|
|
|
|
|
if not public_text and not private_text:
|
|
|
|
|
|
demo_products = [
|
|
|
Product(name="Demo Basic", description="Standard features for small teams.", notes="High churn risk.", price=9.0),
|
|
|
Product(name="Demo Pro", description="Advanced features and priority support.", notes="Stable customer base.", price=39.0),
|
|
|
Product(name="Demo Enterprise", description="Dedicated support and custom integrations.", notes="Potential for expansion.", price=199.0),
|
|
|
]
|
|
|
for p in demo_products:
|
|
|
db.merge(p)
|
|
|
db.commit()
|
|
|
return
|
|
|
|
|
|
|
|
|
from main import llm_reply
|
|
|
|
|
|
system_prompt = """
|
|
|
You are an expert data extractor. Your task is to analyze two documents, a public offering and a private notes document, and extract product information.
|
|
|
|
|
|
Respond with a single JSON array of objects. Each object should represent a product and have the following fields:
|
|
|
- "product": The name of the product.
|
|
|
- "description": The description from the public offering document.
|
|
|
- "notes": Internal notes from the private notes document.
|
|
|
- "price": The price as a numeric value (float), if available.
|
|
|
|
|
|
If you find information that does not belong to a specific product, assign it to a product named "general".
|
|
|
Ensure your output is a valid JSON array.
|
|
|
"""
|
|
|
|
|
|
user_prompt = f"""
|
|
|
Here is the content from the public offering document:
|
|
|
--- PUBLIC OFFERING ---
|
|
|
{public_text}
|
|
|
|
|
|
Here is the content from the private notes document:
|
|
|
--- PRIVATE NOTES ---
|
|
|
{private_text}
|
|
|
|
|
|
Please extract the product information as a JSON array.
|
|
|
"""
|
|
|
|
|
|
|
|
|
import asyncio
|
|
|
llm_response = asyncio.run(llm_reply(system_prompt, [], user_prompt))
|
|
|
|
|
|
try:
|
|
|
|
|
|
json_str = llm_response[llm_response.find('['):llm_response.rfind(']')+1]
|
|
|
extracted_data = json.loads(json_str)
|
|
|
|
|
|
for item in extracted_data:
|
|
|
product = Product(
|
|
|
name=item.get("product", "general"),
|
|
|
description=item.get("description"),
|
|
|
notes=item.get("notes"),
|
|
|
price=float(item["price"]) if item.get("price") else None
|
|
|
)
|
|
|
|
|
|
db.merge(product)
|
|
|
db.commit()
|
|
|
|
|
|
except (json.JSONDecodeError, TypeError, KeyError) as e:
|
|
|
|
|
|
|
|
|
print(f"Error parsing LLM response: {e}")
|
|
|
placeholder = Product(name="Parsing Error", description="Could not parse data from documents.", notes=str(llm_response))
|
|
|
db.merge(placeholder)
|
|
|
db.commit()
|
|
|
|
|
|
|
|
|
|
|
|
def ensure_default_customers(db: Session):
|
|
|
defaults = [
|
|
|
("random", "Synthetic profile with randomized traits", 0.0, 0),
|
|
|
("SMB buyer", "Budget-conscious, quick decisions", 1200.0, 85),
|
|
|
("Enterprise buyer", "Long sales cycle, security-focused", 24000.0, 12),
|
|
|
]
|
|
|
for name, attrs, w, n in defaults:
|
|
|
row = db.query(CustomerProfile).filter_by(name=name).first()
|
|
|
if not row:
|
|
|
db.add(CustomerProfile(name=name, attributes=attrs, wcltv=w, n=n))
|
|
|
db.commit()
|
|
|
|
|
|
|
|
|
|
|
|
def get_or_create_conversation(db: Session, profile_name: str) -> Conversation:
|
|
|
convo = (
|
|
|
db.query(Conversation)
|
|
|
.filter_by(profile_name=profile_name)
|
|
|
.order_by(Conversation.id.desc())
|
|
|
.first()
|
|
|
)
|
|
|
if not convo:
|
|
|
convo = Conversation(profile_name=profile_name)
|
|
|
db.add(convo)
|
|
|
db.commit()
|
|
|
db.refresh(convo)
|
|
|
return convo
|
|
|
|
|
|
|
|
|
def add_message(db: Session, conversation_id: int, sender: str, text: str):
|
|
|
msg = Message(conversation_id=conversation_id, sender=sender, text=text)
|
|
|
db.add(msg)
|
|
|
db.commit()
|
|
|
|
|
|
|
|
|
def get_history(db: Session, conversation_id: int):
|
|
|
convo = db.query(Conversation).filter_by(id=conversation_id).first()
|
|
|
if not convo:
|
|
|
return []
|
|
|
return [{"sender": m.sender, "text": m.text} for m in convo.messages]
|
|
|
|