Spaces:

babaTEEpe
/

davidic

Sleeping

App Files Files Community

babaTEEpe commited on Feb 11

Commit

97d5afe

verified ·

1 Parent(s): e41db88

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -11

app.py CHANGED Viewed

@@ -39,7 +39,7 @@ llm_pipeline = pipeline(
     "text-generation",
     model=llm_model,
     tokenizer=tokenizer,
-    max_new_tokens=150,
     temperature=0.7,
     do_sample=True,
     top_k=50,
@@ -87,29 +87,35 @@ def rerank(request: RerankRequest):
 @app.post("/insight")
 def generate_insight(request: InsightRequest):
     try:
-        # Create a deep-context spiritual assistant prompt
         prompt = (
             f"<|system|>\n"
-            f"You are a spiritual assistant for Davidic Generation Church. "
-            f"Explain how the pastors in the following videos discuss the requested topic. "
-            f"Refer to each result by its label (e.g., 'In Video 1, Pastor explains...'). "
-            f"Be encouraging and summarize the key spiritual takeaways. "
-            f"Answer based ONLY on the provided context.\n"
-            f"<|user|>\nContext:\n{request.context}\n\nQuestion: {request.query}\n"
             f"<|assistant|>\n"
         )
-        output = llm_pipeline(prompt)
         generated_text = output[0]['generated_text']
-        # Clean up the output to only show the assistant's part
         if "<|assistant|>" in generated_text:
             insight = generated_text.split("<|assistant|>")[-1].strip()
         else:
-            insight = generated_text
         return {"insight": insight}
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 if __name__ == "__main__":

     "text-generation",
     model=llm_model,
     tokenizer=tokenizer,
+    max_length=512,  # Set max_length to avoid conflict with max_new_tokens
     temperature=0.7,
     do_sample=True,
     top_k=50,
 @app.post("/insight")
 def generate_insight(request: InsightRequest):
     try:
+        print(f"Generating insight for query: {request.query}")
+        # Optimized prompt for TinyLlama
         prompt = (
             f"<|system|>\n"
+            f"You are a helpful and spiritual AI assistant for Davidic Generation Church. "
+            f"Review the sermon transcripts below and explain how they answer the user's question. "
+            f"Always refer to the videos by their labels (e.g., 'In Video 1...'). "
+            f"Be encouraging and provide deep spiritual context from the pastor's words.\n"
+            f"<|user|>\n"
+            f"CONTEXT:\n{request.context}\n\n"
+            f"QUESTION: {request.query}\n"
             f"<|assistant|>\n"
         )
+        # Explicitly set max_new_tokens here
+        output = llm_pipeline(prompt, max_new_tokens=200)
         generated_text = output[0]['generated_text']
+        # Clean up the output
         if "<|assistant|>" in generated_text:
             insight = generated_text.split("<|assistant|>")[-1].strip()
         else:
+            # Fallback cleanup for different generation styles
+            insight = generated_text.replace(prompt, "").strip()
+        print(f"Generated insight length: {len(insight)}")
         return {"insight": insight}
     except Exception as e:
+        print(f"Insight Error: {e}")
         raise HTTPException(status_code=500, detail=str(e))
 if __name__ == "__main__":