shuarya2011 commited on
Commit
db1070f
Β·
verified Β·
1 Parent(s): 4281982

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -2
app.py CHANGED
@@ -28,7 +28,6 @@ print("Model ready βœ“")
28
 
29
  # ── Inference ─────────────────────────────────────────────────────────────────
30
  def respond(message: str, history: list, system_prompt: str, max_new_tokens: int):
31
- """Build the chat prompt and return a response."""
32
 
33
  messages = []
34
 
@@ -37,7 +36,6 @@ def respond(message: str, history: list, system_prompt: str, max_new_tokens: int
37
  if system_prompt.strip() and not history:
38
  effective_message = f"{system_prompt.strip()}\n\n{message}"
39
 
40
- # history is a list of dicts with 'role' and 'content' keys (messages format)
41
  for turn in history:
42
  messages.append({"role": turn["role"], "content": turn["content"]})
43
 
@@ -50,6 +48,10 @@ def respond(message: str, history: list, system_prompt: str, max_new_tokens: int
50
  return_tensors="pt",
51
  )
52
 
 
 
 
 
53
  # Generate
54
  with torch.no_grad():
55
  output_ids = model.generate(
 
28
 
29
  # ── Inference ─────────────────────────────────────────────────────────────────
30
  def respond(message: str, history: list, system_prompt: str, max_new_tokens: int):
 
31
 
32
  messages = []
33
 
 
36
  if system_prompt.strip() and not history:
37
  effective_message = f"{system_prompt.strip()}\n\n{message}"
38
 
 
39
  for turn in history:
40
  messages.append({"role": turn["role"], "content": turn["content"]})
41
 
 
48
  return_tensors="pt",
49
  )
50
 
51
+ # Gemma 4 returns a BatchEncoding dict instead of a raw tensor
52
+ if not isinstance(input_ids, torch.Tensor):
53
+ input_ids = input_ids["input_ids"]
54
+
55
  # Generate
56
  with torch.no_grad():
57
  output_ids = model.generate(