Update app.py
Browse files
app.py
CHANGED
|
@@ -39,7 +39,7 @@ llm_pipeline = pipeline(
|
|
| 39 |
"text-generation",
|
| 40 |
model=llm_model,
|
| 41 |
tokenizer=tokenizer,
|
| 42 |
-
|
| 43 |
temperature=0.7,
|
| 44 |
do_sample=True,
|
| 45 |
top_k=50,
|
|
@@ -87,29 +87,35 @@ def rerank(request: RerankRequest):
|
|
| 87 |
@app.post("/insight")
|
| 88 |
def generate_insight(request: InsightRequest):
|
| 89 |
try:
|
| 90 |
-
|
|
|
|
| 91 |
prompt = (
|
| 92 |
f"<|system|>\n"
|
| 93 |
-
f"You are a spiritual assistant for Davidic Generation Church. "
|
| 94 |
-
f"
|
| 95 |
-
f"
|
| 96 |
-
f"Be encouraging and
|
| 97 |
-
f"
|
| 98 |
-
f"
|
|
|
|
| 99 |
f"<|assistant|>\n"
|
| 100 |
)
|
| 101 |
|
| 102 |
-
|
|
|
|
| 103 |
generated_text = output[0]['generated_text']
|
| 104 |
|
| 105 |
-
# Clean up the output
|
| 106 |
if "<|assistant|>" in generated_text:
|
| 107 |
insight = generated_text.split("<|assistant|>")[-1].strip()
|
| 108 |
else:
|
| 109 |
-
|
|
|
|
| 110 |
|
|
|
|
| 111 |
return {"insight": insight}
|
| 112 |
except Exception as e:
|
|
|
|
| 113 |
raise HTTPException(status_code=500, detail=str(e))
|
| 114 |
|
| 115 |
if __name__ == "__main__":
|
|
|
|
| 39 |
"text-generation",
|
| 40 |
model=llm_model,
|
| 41 |
tokenizer=tokenizer,
|
| 42 |
+
max_length=512, # Set max_length to avoid conflict with max_new_tokens
|
| 43 |
temperature=0.7,
|
| 44 |
do_sample=True,
|
| 45 |
top_k=50,
|
|
|
|
| 87 |
@app.post("/insight")
|
| 88 |
def generate_insight(request: InsightRequest):
|
| 89 |
try:
|
| 90 |
+
print(f"Generating insight for query: {request.query}")
|
| 91 |
+
# Optimized prompt for TinyLlama
|
| 92 |
prompt = (
|
| 93 |
f"<|system|>\n"
|
| 94 |
+
f"You are a helpful and spiritual AI assistant for Davidic Generation Church. "
|
| 95 |
+
f"Review the sermon transcripts below and explain how they answer the user's question. "
|
| 96 |
+
f"Always refer to the videos by their labels (e.g., 'In Video 1...'). "
|
| 97 |
+
f"Be encouraging and provide deep spiritual context from the pastor's words.\n"
|
| 98 |
+
f"<|user|>\n"
|
| 99 |
+
f"CONTEXT:\n{request.context}\n\n"
|
| 100 |
+
f"QUESTION: {request.query}\n"
|
| 101 |
f"<|assistant|>\n"
|
| 102 |
)
|
| 103 |
|
| 104 |
+
# Explicitly set max_new_tokens here
|
| 105 |
+
output = llm_pipeline(prompt, max_new_tokens=200)
|
| 106 |
generated_text = output[0]['generated_text']
|
| 107 |
|
| 108 |
+
# Clean up the output
|
| 109 |
if "<|assistant|>" in generated_text:
|
| 110 |
insight = generated_text.split("<|assistant|>")[-1].strip()
|
| 111 |
else:
|
| 112 |
+
# Fallback cleanup for different generation styles
|
| 113 |
+
insight = generated_text.replace(prompt, "").strip()
|
| 114 |
|
| 115 |
+
print(f"Generated insight length: {len(insight)}")
|
| 116 |
return {"insight": insight}
|
| 117 |
except Exception as e:
|
| 118 |
+
print(f"Insight Error: {e}")
|
| 119 |
raise HTTPException(status_code=500, detail=str(e))
|
| 120 |
|
| 121 |
if __name__ == "__main__":
|