davidic / app.py
babaTEEpe's picture
Update app.py
e7065b3 verified
import torch
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from sentence_transformers import SentenceTransformer, CrossEncoder
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import os
# Initialize FastAPI
app = FastAPI(title="Davidic Sermon Intelligence API")
# Add CORS Middleware
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Load Models
print("Loading Embedding model...")
embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
print("Loading Reranker model...")
reranker_model = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
print("Loading Tiny LLM (TinyLlama-1.1B)...")
model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
tokenizer = AutoTokenizer.from_pretrained(model_id)
llm_model = AutoModelForCausalLM.from_pretrained(
model_id,
torch_dtype=torch.float32,
low_cpu_mem_usage=True
)
# Pipeline WITHOUT generation config to avoid warnings
llm_pipeline = pipeline(
"text-generation",
model=llm_model,
tokenizer=tokenizer
)
print("All models loaded Ready.")
class EmbedRequest(BaseModel):
text: str
class RerankRequest(BaseModel):
query: str
documents: list[str]
class InsightRequest(BaseModel):
query: str
context: str
@app.get("/")
def health_check():
return {"status": "running"}
@app.post("/embed")
def embed(request: EmbedRequest):
try:
return embedding_model.encode(request.text).tolist()
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.post("/rerank")
def rerank(request: RerankRequest):
try:
pairs = [[request.query, doc] for doc in request.documents]
return reranker_model.predict(pairs).tolist()
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.post("/insight")
def generate_insight(request: InsightRequest):
try:
print(f"Generating insight for: {request.query}")
prompt = (
f"<|system|>\n"
f"You are a helpful spiritual assistant for Davidic Generation Church. "
f"Explain the spiritual context of the videos below based on their transcripts.\n"
f"RULES:\n"
f"1. Refer to videos like this: 'In [Video 1], Pastor explains...'.\n"
f"2. Summarize WHY this moment is relevant to the question.\n"
f"3. Do NOT just repeat the transcript. Explain the meaning.\n"
f"4. Be thorough and long-form.\n"
f"<|user|>\n"
f"CONTEXT:\n{request.context}\n\n"
f"QUESTION: {request.query}\n"
f"<|assistant|>\n"
)
# Explicitly set ALL parameters here
output = llm_pipeline(
prompt,
max_new_tokens=512,
temperature=0.7,
do_sample=True,
top_k=50,
top_p=0.9,
pad_token_id=tokenizer.eos_token_id,
eos_token_id=tokenizer.eos_token_id
)
result = output[0]['generated_text']
if "<|assistant|>" in result:
insight = result.split("<|assistant|>")[-1].strip()
else:
insight = result[len(prompt):].strip()
return {"insight": insight}
except Exception as e:
print(f"Error: {e}")
raise HTTPException(status_code=500, detail=str(e))
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)