Spaces:

babaTEEpe
/

davidic

Sleeping

App Files Files Community

babaTEEpe commited on Feb 11

Commit

e7065b3

verified ·

1 Parent(s): 1d722eb

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -28

app.py CHANGED Viewed

@@ -9,7 +9,7 @@ import os
 # Initialize FastAPI
 app = FastAPI(title="Davidic Sermon Intelligence API")
-# Add CORS Middleware to allow requests from Vercel
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
@@ -23,26 +23,25 @@ print("Loading Embedding model...")
 embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
 print("Loading Reranker model...")
-# Minimal reranker that fits on CPU well
 reranker_model = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
 print("Loading Tiny LLM (TinyLlama-1.1B)...")
 model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
-# Load on CPU, ensure it stays light
 llm_model = AutoModelForCausalLM.from_pretrained(
     model_id,
     torch_dtype=torch.float32,
     low_cpu_mem_usage=True
 )
 llm_pipeline = pipeline(
     "text-generation",
     model=llm_model,
     tokenizer=tokenizer
 )
-print("All models loaded.")
-# Request Schemas
 class EmbedRequest(BaseModel):
     text: str
@@ -56,51 +55,46 @@ class InsightRequest(BaseModel):
 @app.get("/")
 def health_check():
-    return {
-        "status": "running",
-        "models": ["all-MiniLM-L6-v2", "ms-marco-MiniLM-L-6-v2", "TinyLlama-1.1B"]
-    }
 @app.post("/embed")
 def embed(request: EmbedRequest):
     try:
-        embedding = embedding_model.encode(request.text).tolist()
-        return embedding
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 @app.post("/rerank")
 def rerank(request: RerankRequest):
     try:
-        # Cross-encoder takes pairs of (query, document)
         pairs = [[request.query, doc] for doc in request.documents]
-        scores = reranker_model.predict(pairs).tolist()
-        return scores
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 @app.post("/insight")
 def generate_insight(request: InsightRequest):
     try:
-        print(f"Generating insight for query: {request.query}")
         prompt = (
             f"<|system|>\n"
             f"You are a helpful spiritual assistant for Davidic Generation Church. "
-            f"Your goal is to provide detailed and comprehensive explainations for the sermon videos below.\n"
             f"RULES:\n"
-            f"1. Provide a thorough summary for each video (e.g., 'In [Video 1], Pastor goes deep into...').\n"
-            f"2. Explain the spiritual context and practical applications discussed.\n"
-            f"3. Write as much relevant detail as possible based on the transcripts.\n"
             f"<|user|>\n"
             f"CONTEXT:\n{request.context}\n\n"
-            f"SEARCH QUERY: {request.query}\n"
             f"<|assistant|>\n"
         )
-        # Pass ALL generation parameters here, and NONE in the pipeline init
         output = llm_pipeline(
             prompt,
-            max_new_tokens=512, # Increased for longer insights
             temperature=0.7,
             do_sample=True,
             top_k=50,
@@ -108,17 +102,16 @@ def generate_insight(request: InsightRequest):
             pad_token_id=tokenizer.eos_token_id,
             eos_token_id=tokenizer.eos_token_id
         )
-        generated_text = output[0]['generated_text']
-        # Cleanly extract only the assistant's part
-        if "<|assistant|>" in generated_text:
-            insight = generated_text.split("<|assistant|>")[-1].strip()
         else:
-            insight = generated_text[len(prompt):].strip()
         return {"insight": insight}
     except Exception as e:
-        print(f"Insight Error: {e}")
         raise HTTPException(status_code=500, detail=str(e))
 if __name__ == "__main__":

 # Initialize FastAPI
 app = FastAPI(title="Davidic Sermon Intelligence API")
+# Add CORS Middleware
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
 embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
 print("Loading Reranker model...")
 reranker_model = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
 print("Loading Tiny LLM (TinyLlama-1.1B)...")
 model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 llm_model = AutoModelForCausalLM.from_pretrained(
     model_id,
     torch_dtype=torch.float32,
     low_cpu_mem_usage=True
 )
+# Pipeline WITHOUT generation config to avoid warnings
 llm_pipeline = pipeline(
     "text-generation",
     model=llm_model,
     tokenizer=tokenizer
 )
+print("All models loaded Ready.")
 class EmbedRequest(BaseModel):
     text: str
 @app.get("/")
 def health_check():
+    return {"status": "running"}
 @app.post("/embed")
 def embed(request: EmbedRequest):
     try:
+        return embedding_model.encode(request.text).tolist()
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 @app.post("/rerank")
 def rerank(request: RerankRequest):
     try:
         pairs = [[request.query, doc] for doc in request.documents]
+        return reranker_model.predict(pairs).tolist()
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 @app.post("/insight")
 def generate_insight(request: InsightRequest):
     try:
+        print(f"Generating insight for: {request.query}")
         prompt = (
             f"<|system|>\n"
             f"You are a helpful spiritual assistant for Davidic Generation Church. "
+            f"Explain the spiritual context of the videos below based on their transcripts.\n"
             f"RULES:\n"
+            f"1. Refer to videos like this: 'In [Video 1], Pastor explains...'.\n"
+            f"2. Summarize WHY this moment is relevant to the question.\n"
+            f"3. Do NOT just repeat the transcript. Explain the meaning.\n"
+            f"4. Be thorough and long-form.\n"
             f"<|user|>\n"
             f"CONTEXT:\n{request.context}\n\n"
+            f"QUESTION: {request.query}\n"
             f"<|assistant|>\n"
         )
+        # Explicitly set ALL parameters here
         output = llm_pipeline(
             prompt,
+            max_new_tokens=512,
             temperature=0.7,
             do_sample=True,
             top_k=50,
             pad_token_id=tokenizer.eos_token_id,
             eos_token_id=tokenizer.eos_token_id
         )
+        result = output[0]['generated_text']
+        if "<|assistant|>" in result:
+            insight = result.split("<|assistant|>")[-1].strip()
         else:
+            insight = result[len(prompt):].strip()
         return {"insight": insight}
     except Exception as e:
+        print(f"Error: {e}")
         raise HTTPException(status_code=500, detail=str(e))
 if __name__ == "__main__":