hello-ram commited on
Commit
34202a9
·
verified ·
1 Parent(s): 98a1339

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -10
app.py CHANGED
@@ -55,22 +55,28 @@ def health():
55
  # Inference endpoint
56
  @app.post("/predict")
57
  def predict(request: QueryRequest):
58
- inputs = tokenizer(request.question, return_tensors="pt")
 
 
 
 
 
 
 
 
 
 
59
  outputs = model.generate(
60
  **inputs,
61
  max_new_tokens=request.max_new_tokens,
62
  do_sample=True,
63
- temperature=0.7,
64
- top_p=0.9,
65
- pad_token_id=tokenizer.eos_token_id,
66
- return_dict_in_generate=True
67
  )
68
 
69
- answer = tokenizer.decode(outputs.sequences[0], skip_special_tokens=True)
70
- return {
71
- "question": request.question,
72
- "answer": answer
73
- }
74
 
75
 
76
 
 
55
  # Inference endpoint
56
  @app.post("/predict")
57
  def predict(request: QueryRequest):
58
+ messages = [
59
+ {"role": "system", "content": "reasoning language: english\n\nYou are a helpful assistant."},
60
+ {"role": "user", "content": request.question}
61
+ ]
62
+
63
+ inputs = tokenizer.apply_chat_template(
64
+ messages,
65
+ add_generation_prompt=True,
66
+ return_tensors="pt"
67
+ ).to(model.device)
68
+
69
  outputs = model.generate(
70
  **inputs,
71
  max_new_tokens=request.max_new_tokens,
72
  do_sample=True,
73
+ temperature=request.temperature,
74
+ top_p=request.top_p,
75
+ pad_token_id=tokenizer.eos_token_id
 
76
  )
77
 
78
+ answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
79
+ return {"question": request.question, "answer": answer}
 
 
 
80
 
81
 
82