babaTEEpe commited on
Commit
e7065b3
·
verified ·
1 Parent(s): 1d722eb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -28
app.py CHANGED
@@ -9,7 +9,7 @@ import os
9
  # Initialize FastAPI
10
  app = FastAPI(title="Davidic Sermon Intelligence API")
11
 
12
- # Add CORS Middleware to allow requests from Vercel
13
  app.add_middleware(
14
  CORSMiddleware,
15
  allow_origins=["*"],
@@ -23,26 +23,25 @@ print("Loading Embedding model...")
23
  embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
24
 
25
  print("Loading Reranker model...")
26
- # Minimal reranker that fits on CPU well
27
  reranker_model = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
28
 
29
  print("Loading Tiny LLM (TinyLlama-1.1B)...")
30
  model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
31
  tokenizer = AutoTokenizer.from_pretrained(model_id)
32
- # Load on CPU, ensure it stays light
33
  llm_model = AutoModelForCausalLM.from_pretrained(
34
  model_id,
35
  torch_dtype=torch.float32,
36
  low_cpu_mem_usage=True
37
  )
 
 
38
  llm_pipeline = pipeline(
39
  "text-generation",
40
  model=llm_model,
41
  tokenizer=tokenizer
42
  )
43
- print("All models loaded.")
44
 
45
- # Request Schemas
46
  class EmbedRequest(BaseModel):
47
  text: str
48
 
@@ -56,51 +55,46 @@ class InsightRequest(BaseModel):
56
 
57
  @app.get("/")
58
  def health_check():
59
- return {
60
- "status": "running",
61
- "models": ["all-MiniLM-L6-v2", "ms-marco-MiniLM-L-6-v2", "TinyLlama-1.1B"]
62
- }
63
 
64
  @app.post("/embed")
65
  def embed(request: EmbedRequest):
66
  try:
67
- embedding = embedding_model.encode(request.text).tolist()
68
- return embedding
69
  except Exception as e:
70
  raise HTTPException(status_code=500, detail=str(e))
71
 
72
  @app.post("/rerank")
73
  def rerank(request: RerankRequest):
74
  try:
75
- # Cross-encoder takes pairs of (query, document)
76
  pairs = [[request.query, doc] for doc in request.documents]
77
- scores = reranker_model.predict(pairs).tolist()
78
- return scores
79
  except Exception as e:
80
  raise HTTPException(status_code=500, detail=str(e))
81
 
82
  @app.post("/insight")
83
  def generate_insight(request: InsightRequest):
84
  try:
85
- print(f"Generating insight for query: {request.query}")
86
  prompt = (
87
  f"<|system|>\n"
88
  f"You are a helpful spiritual assistant for Davidic Generation Church. "
89
- f"Your goal is to provide detailed and comprehensive explainations for the sermon videos below.\n"
90
  f"RULES:\n"
91
- f"1. Provide a thorough summary for each video (e.g., 'In [Video 1], Pastor goes deep into...').\n"
92
- f"2. Explain the spiritual context and practical applications discussed.\n"
93
- f"3. Write as much relevant detail as possible based on the transcripts.\n"
 
94
  f"<|user|>\n"
95
  f"CONTEXT:\n{request.context}\n\n"
96
- f"SEARCH QUERY: {request.query}\n"
97
  f"<|assistant|>\n"
98
  )
99
 
100
- # Pass ALL generation parameters here, and NONE in the pipeline init
101
  output = llm_pipeline(
102
  prompt,
103
- max_new_tokens=512, # Increased for longer insights
104
  temperature=0.7,
105
  do_sample=True,
106
  top_k=50,
@@ -108,17 +102,16 @@ def generate_insight(request: InsightRequest):
108
  pad_token_id=tokenizer.eos_token_id,
109
  eos_token_id=tokenizer.eos_token_id
110
  )
111
- generated_text = output[0]['generated_text']
112
 
113
- # Cleanly extract only the assistant's part
114
- if "<|assistant|>" in generated_text:
115
- insight = generated_text.split("<|assistant|>")[-1].strip()
116
  else:
117
- insight = generated_text[len(prompt):].strip()
118
 
119
  return {"insight": insight}
120
  except Exception as e:
121
- print(f"Insight Error: {e}")
122
  raise HTTPException(status_code=500, detail=str(e))
123
 
124
  if __name__ == "__main__":
 
9
  # Initialize FastAPI
10
  app = FastAPI(title="Davidic Sermon Intelligence API")
11
 
12
+ # Add CORS Middleware
13
  app.add_middleware(
14
  CORSMiddleware,
15
  allow_origins=["*"],
 
23
  embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
24
 
25
  print("Loading Reranker model...")
 
26
  reranker_model = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
27
 
28
  print("Loading Tiny LLM (TinyLlama-1.1B)...")
29
  model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
30
  tokenizer = AutoTokenizer.from_pretrained(model_id)
 
31
  llm_model = AutoModelForCausalLM.from_pretrained(
32
  model_id,
33
  torch_dtype=torch.float32,
34
  low_cpu_mem_usage=True
35
  )
36
+
37
+ # Pipeline WITHOUT generation config to avoid warnings
38
  llm_pipeline = pipeline(
39
  "text-generation",
40
  model=llm_model,
41
  tokenizer=tokenizer
42
  )
43
+ print("All models loaded Ready.")
44
 
 
45
  class EmbedRequest(BaseModel):
46
  text: str
47
 
 
55
 
56
  @app.get("/")
57
  def health_check():
58
+ return {"status": "running"}
 
 
 
59
 
60
  @app.post("/embed")
61
  def embed(request: EmbedRequest):
62
  try:
63
+ return embedding_model.encode(request.text).tolist()
 
64
  except Exception as e:
65
  raise HTTPException(status_code=500, detail=str(e))
66
 
67
  @app.post("/rerank")
68
  def rerank(request: RerankRequest):
69
  try:
 
70
  pairs = [[request.query, doc] for doc in request.documents]
71
+ return reranker_model.predict(pairs).tolist()
 
72
  except Exception as e:
73
  raise HTTPException(status_code=500, detail=str(e))
74
 
75
  @app.post("/insight")
76
  def generate_insight(request: InsightRequest):
77
  try:
78
+ print(f"Generating insight for: {request.query}")
79
  prompt = (
80
  f"<|system|>\n"
81
  f"You are a helpful spiritual assistant for Davidic Generation Church. "
82
+ f"Explain the spiritual context of the videos below based on their transcripts.\n"
83
  f"RULES:\n"
84
+ f"1. Refer to videos like this: 'In [Video 1], Pastor explains...'.\n"
85
+ f"2. Summarize WHY this moment is relevant to the question.\n"
86
+ f"3. Do NOT just repeat the transcript. Explain the meaning.\n"
87
+ f"4. Be thorough and long-form.\n"
88
  f"<|user|>\n"
89
  f"CONTEXT:\n{request.context}\n\n"
90
+ f"QUESTION: {request.query}\n"
91
  f"<|assistant|>\n"
92
  )
93
 
94
+ # Explicitly set ALL parameters here
95
  output = llm_pipeline(
96
  prompt,
97
+ max_new_tokens=512,
98
  temperature=0.7,
99
  do_sample=True,
100
  top_k=50,
 
102
  pad_token_id=tokenizer.eos_token_id,
103
  eos_token_id=tokenizer.eos_token_id
104
  )
 
105
 
106
+ result = output[0]['generated_text']
107
+ if "<|assistant|>" in result:
108
+ insight = result.split("<|assistant|>")[-1].strip()
109
  else:
110
+ insight = result[len(prompt):].strip()
111
 
112
  return {"insight": insight}
113
  except Exception as e:
114
+ print(f"Error: {e}")
115
  raise HTTPException(status_code=500, detail=str(e))
116
 
117
  if __name__ == "__main__":