Summarizer / app.py
ClergeF's picture
Create app.py
a4a22e3 verified
from fastapi import FastAPI
from pydantic import BaseModel
from sentence_transformers import SentenceTransformer
from transformers import pipeline
import pandas as pd
import numpy as np
# Load embedding model (retriever)
embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
# Load summarizer/generator
summarizer = pipeline("text2text-generation", model="google/flan-t5-base")
# Load your check-in data
checkins = pd.read_csv("all_checkins.csv") # Must have Timestamp, Sender, Msg
messages = checkins["Msg"].astype(str).tolist()
# Precompute embeddings
embeddings = embedder.encode(messages, convert_to_numpy=True)
# FastAPI setup
app = FastAPI()
class Query(BaseModel):
question: str
@app.post("/ask")
def ask(query: Query):
# Step 1: Encode query
q_vec = embedder.encode(query.question, convert_to_numpy=True)
scores = np.dot(embeddings, q_vec) / (
np.linalg.norm(embeddings, axis=1) * np.linalg.norm(q_vec)
)
# Step 2: Grab top 5 matching check-ins
top_idx = np.argsort(scores)[::-1][:5]
retrieved = [f"- {checkins.iloc[i]['Sender']}: {checkins.iloc[i]['Msg']}" for i in top_idx]
context = "\n".join(retrieved)
# Step 3: Summarize into natural answer
prompt = f"Based only on the following check-ins, answer the question.\n\nCheck-ins:\n{context}\n\nQuestion: {query.question}\nAnswer:"
summary = summarizer(prompt, max_length=200, do_sample=False)[0]["generated_text"]
return {
"question": query.question,
"answer": summary,
"evidence": retrieved
}