| import torch |
| from fastapi import FastAPI, HTTPException |
| from fastapi.middleware.cors import CORSMiddleware |
| from pydantic import BaseModel |
| from sentence_transformers import SentenceTransformer, CrossEncoder |
| from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline |
| import os |
|
|
| |
| app = FastAPI(title="Davidic Sermon Intelligence API") |
|
|
| |
| app.add_middleware( |
| CORSMiddleware, |
| allow_origins=["*"], |
| allow_credentials=True, |
| allow_methods=["*"], |
| allow_headers=["*"], |
| ) |
|
|
| |
| print("Loading Embedding model...") |
| embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') |
|
|
| print("Loading Reranker model...") |
| reranker_model = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2') |
|
|
| print("Loading Tiny LLM (TinyLlama-1.1B)...") |
| model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" |
| tokenizer = AutoTokenizer.from_pretrained(model_id) |
| llm_model = AutoModelForCausalLM.from_pretrained( |
| model_id, |
| torch_dtype=torch.float32, |
| low_cpu_mem_usage=True |
| ) |
|
|
| |
| llm_pipeline = pipeline( |
| "text-generation", |
| model=llm_model, |
| tokenizer=tokenizer |
| ) |
| print("All models loaded Ready.") |
|
|
| class EmbedRequest(BaseModel): |
| text: str |
|
|
| class RerankRequest(BaseModel): |
| query: str |
| documents: list[str] |
|
|
| class InsightRequest(BaseModel): |
| query: str |
| context: str |
|
|
| @app.get("/") |
| def health_check(): |
| return {"status": "running"} |
|
|
| @app.post("/embed") |
| def embed(request: EmbedRequest): |
| try: |
| return embedding_model.encode(request.text).tolist() |
| except Exception as e: |
| raise HTTPException(status_code=500, detail=str(e)) |
|
|
| @app.post("/rerank") |
| def rerank(request: RerankRequest): |
| try: |
| pairs = [[request.query, doc] for doc in request.documents] |
| return reranker_model.predict(pairs).tolist() |
| except Exception as e: |
| raise HTTPException(status_code=500, detail=str(e)) |
|
|
| @app.post("/insight") |
| def generate_insight(request: InsightRequest): |
| try: |
| print(f"Generating insight for: {request.query}") |
| prompt = ( |
| f"<|system|>\n" |
| f"You are a helpful spiritual assistant for Davidic Generation Church. " |
| f"Explain the spiritual context of the videos below based on their transcripts.\n" |
| f"RULES:\n" |
| f"1. Refer to videos like this: 'In [Video 1], Pastor explains...'.\n" |
| f"2. Summarize WHY this moment is relevant to the question.\n" |
| f"3. Do NOT just repeat the transcript. Explain the meaning.\n" |
| f"4. Be thorough and long-form.\n" |
| f"<|user|>\n" |
| f"CONTEXT:\n{request.context}\n\n" |
| f"QUESTION: {request.query}\n" |
| f"<|assistant|>\n" |
| ) |
| |
| |
| output = llm_pipeline( |
| prompt, |
| max_new_tokens=512, |
| temperature=0.7, |
| do_sample=True, |
| top_k=50, |
| top_p=0.9, |
| pad_token_id=tokenizer.eos_token_id, |
| eos_token_id=tokenizer.eos_token_id |
| ) |
| |
| result = output[0]['generated_text'] |
| if "<|assistant|>" in result: |
| insight = result.split("<|assistant|>")[-1].strip() |
| else: |
| insight = result[len(prompt):].strip() |
| |
| return {"insight": insight} |
| except Exception as e: |
| print(f"Error: {e}") |
| raise HTTPException(status_code=500, detail=str(e)) |
|
|
| if __name__ == "__main__": |
| import uvicorn |
| uvicorn.run(app, host="0.0.0.0", port=7860) |
|
|