Spaces:

babaTEEpe
/

davidic

Sleeping

App Files Files Community

davidic / app.py

babaTEEpe

Update app.py

e7065b3 verified 2 months ago

raw

history blame contribute delete

3.62 kB

	import torch
	from fastapi import FastAPI, HTTPException
	from fastapi.middleware.cors import CORSMiddleware
	from pydantic import BaseModel
	from sentence_transformers import SentenceTransformer, CrossEncoder
	from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
	import os

	# Initialize FastAPI
	app = FastAPI(title="Davidic Sermon Intelligence API")

	# Add CORS Middleware
	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	# Load Models
	print("Loading Embedding model...")
	embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

	print("Loading Reranker model...")
	reranker_model = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')

	print("Loading Tiny LLM (TinyLlama-1.1B)...")
	model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
	tokenizer = AutoTokenizer.from_pretrained(model_id)
	llm_model = AutoModelForCausalLM.from_pretrained(
	model_id,
	torch_dtype=torch.float32,
	low_cpu_mem_usage=True
	)

	# Pipeline WITHOUT generation config to avoid warnings
	llm_pipeline = pipeline(
	"text-generation",
	model=llm_model,
	tokenizer=tokenizer
	)
	print("All models loaded Ready.")

	class EmbedRequest(BaseModel):
	text: str

	class RerankRequest(BaseModel):
	query: str
	documents: list[str]

	class InsightRequest(BaseModel):
	query: str
	context: str

	@app.get("/")
	def health_check():
	return {"status": "running"}

	@app.post("/embed")
	def embed(request: EmbedRequest):
	try:
	return embedding_model.encode(request.text).tolist()
	except Exception as e:
	raise HTTPException(status_code=500, detail=str(e))

	@app.post("/rerank")
	def rerank(request: RerankRequest):
	try:
	pairs = [[request.query, doc] for doc in request.documents]
	return reranker_model.predict(pairs).tolist()
	except Exception as e:
	raise HTTPException(status_code=500, detail=str(e))

	@app.post("/insight")
	def generate_insight(request: InsightRequest):
	try:
	print(f"Generating insight for: {request.query}")
	prompt = (
	f"<\|system\|>\n"
	f"You are a helpful spiritual assistant for Davidic Generation Church. "
	f"Explain the spiritual context of the videos below based on their transcripts.\n"
	f"RULES:\n"
	f"1. Refer to videos like this: 'In [Video 1], Pastor explains...'.\n"
	f"2. Summarize WHY this moment is relevant to the question.\n"
	f"3. Do NOT just repeat the transcript. Explain the meaning.\n"
	f"4. Be thorough and long-form.\n"
	f"<\|user\|>\n"
	f"CONTEXT:\n{request.context}\n\n"
	f"QUESTION: {request.query}\n"
	f"<\|assistant\|>\n"
	)

	# Explicitly set ALL parameters here
	output = llm_pipeline(
	prompt,
	max_new_tokens=512,
	temperature=0.7,
	do_sample=True,
	top_k=50,
	top_p=0.9,
	pad_token_id=tokenizer.eos_token_id,
	eos_token_id=tokenizer.eos_token_id
	)

	result = output[0]['generated_text']
	if "<\|assistant\|>" in result:
	insight = result.split("<\|assistant\|>")[-1].strip()
	else:
	insight = result[len(prompt):].strip()

	return {"insight": insight}
	except Exception as e:
	print(f"Error: {e}")
	raise HTTPException(status_code=500, detail=str(e))

	if __name__ == "__main__":
	import uvicorn
	uvicorn.run(app, host="0.0.0.0", port=7860)