Spaces:

VeuReu
/

schat

Running on Zero

File size: 5,048 Bytes

ba54b37

import os
import json
from sqlalchemy.orm import Session
from models import Product, CustomerProfile, Conversation, Message
from typing import List
from PyPDF2 import PdfReader

# Product services

def _read_pdf_text(file_path: str) -> str:
    try:
        reader = PdfReader(file_path)
        return "\n".join(page.extract_text() or "" for page in reader.pages)
    except Exception:
        return ""

def extract_and_upsert_products_from_llm(db: Session, public_pdf_path: str, private_pdf_path: str):
    """Extracts product info from PDFs using an LLM and saves to DB."""
    public_text = _read_pdf_text(public_pdf_path)
    private_text = _read_pdf_text(private_pdf_path)

    if not public_text and not private_text:
        # Fallback for demo if PDFs are empty or unreadable
        demo_products = [
            Product(name="Demo Basic", description="Standard features for small teams.", notes="High churn risk.", price=9.0),
            Product(name="Demo Pro", description="Advanced features and priority support.", notes="Stable customer base.", price=39.0),
            Product(name="Demo Enterprise", description="Dedicated support and custom integrations.", notes="Potential for expansion.", price=199.0),
        ]
        for p in demo_products:
            db.merge(p)
        db.commit()
        return

    # Use the LLM call logic from main.py
    from main import llm_reply

    system_prompt = """

You are an expert data extractor. Your task is to analyze two documents, a public offering and a private notes document, and extract product information.



Respond with a single JSON array of objects. Each object should represent a product and have the following fields:

- "product": The name of the product.

- "description": The description from the public offering document.

- "notes": Internal notes from the private notes document.

- "price": The price as a numeric value (float), if available.



If you find information that does not belong to a specific product, assign it to a product named "general".

Ensure your output is a valid JSON array.

"""

    user_prompt = f"""

Here is the content from the public offering document:

--- PUBLIC OFFERING ---

{public_text}



Here is the content from the private notes document:

--- PRIVATE NOTES ---

{private_text}



Please extract the product information as a JSON array.

"""

    # This is a blocking call, so we don't need async here
    import asyncio
    llm_response = asyncio.run(llm_reply(system_prompt, [], user_prompt))

    try:
        # Clean the response to get only the JSON part
        json_str = llm_response[llm_response.find('['):llm_response.rfind(']')+1]
        extracted_data = json.loads(json_str)

        for item in extracted_data:
            product = Product(
                name=item.get("product", "general"),
                description=item.get("description"),
                notes=item.get("notes"),
                price=float(item["price"]) if item.get("price") else None
            )
            # Use merge to insert or update based on the primary key (name)
            db.merge(product)
        db.commit()

    except (json.JSONDecodeError, TypeError, KeyError) as e:
        # Handle cases where LLM output is not as expected
        # For demo, we can log the error and maybe insert a placeholder
        print(f"Error parsing LLM response: {e}")
        placeholder = Product(name="Parsing Error", description="Could not parse data from documents.", notes=str(llm_response))
        db.merge(placeholder)
        db.commit()

# Customer services

def ensure_default_customers(db: Session):
    defaults = [
        ("random", "Synthetic profile with randomized traits", 0.0, 0),
        ("SMB buyer", "Budget-conscious, quick decisions", 1200.0, 85),
        ("Enterprise buyer", "Long sales cycle, security-focused", 24000.0, 12),
    ]
    for name, attrs, w, n in defaults:
        row = db.query(CustomerProfile).filter_by(name=name).first()
        if not row:
            db.add(CustomerProfile(name=name, attributes=attrs, wcltv=w, n=n))
    db.commit()

# Chat services

def get_or_create_conversation(db: Session, profile_name: str) -> Conversation:
    convo = (
        db.query(Conversation)
        .filter_by(profile_name=profile_name)
        .order_by(Conversation.id.desc())
        .first()
    )
    if not convo:
        convo = Conversation(profile_name=profile_name)
        db.add(convo)
        db.commit()
        db.refresh(convo)
    return convo


def add_message(db: Session, conversation_id: int, sender: str, text: str):
    msg = Message(conversation_id=conversation_id, sender=sender, text=text)
    db.add(msg)
    db.commit()


def get_history(db: Session, conversation_id: int):
    convo = db.query(Conversation).filter_by(id=conversation_id).first()
    if not convo:
        return []
    return [{"sender": m.sender, "text": m.text} for m in convo.messages]