Spaces:

Rady10
/

vision-model-api

Sleeping

App Files Files Community

Rady10 commited on May 7

Commit

5b9d376

verified ·

1 Parent(s): 56d265c

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -9

app.py CHANGED Viewed

@@ -98,10 +98,7 @@ def chunk_to_text(chunk) -> str:
 def to_content_list(content) -> list:
-    """
-    apply_chat_template requires content to ALWAYS be a list of dicts.
-    Never a plain string — that causes: TypeError: string indices must be integers
-    """
     if isinstance(content, str):
         return [{"type": "text", "text": content}]
     if isinstance(content, list):
@@ -151,19 +148,17 @@ def build_full_messages(messages: list, image: Image.Image, rag_context: str) ->
         )
     system_prompt = "\n\n".join(system_parts)
-    # ⚠️ content MUST be list of dicts — never a plain string
     full_messages = [
         {"role": "user",      "content": [{"type": "text", "text": system_prompt}]},
         {"role": "assistant", "content": [{"type": "text", "text": "Understood. I will use this knowledge to help you."}]},
     ]
-    # normalize every incoming message too
     norm = [
         {"role": m["role"], "content": to_content_list(m.get("content", ""))}
         for m in messages
     ]
-    # inject image into last user turn
     if image is not None:
         for i in range(len(norm) - 1, -1, -1):
             if norm[i]["role"] == "user":
@@ -183,12 +178,16 @@ def chat(req: ChatRequest):
     rag_context = "" if image else retrieve_rag_context(req.messages)
     full_messages = build_full_messages(req.messages, image, rag_context)
     inputs = processor.apply_chat_template(
         full_messages,
         add_generation_prompt=True,
         tokenize=True,
         return_tensors="pt",
-    ).to(model.device)
     with torch.no_grad():
         output_ids = model.generate(
@@ -198,7 +197,10 @@ def chat(req: ChatRequest):
             top_p=0.9,
         )
-    response_text = processor.decode(output_ids[0], skip_special_tokens=True)
     return {
         "response":   response_text,

 def to_content_list(content) -> list:
+    """content must always be a list of dicts for apply_chat_template"""
     if isinstance(content, str):
         return [{"type": "text", "text": content}]
     if isinstance(content, list):
         )
     system_prompt = "\n\n".join(system_parts)
+    # content MUST be list of dicts — never plain string
     full_messages = [
         {"role": "user",      "content": [{"type": "text", "text": system_prompt}]},
         {"role": "assistant", "content": [{"type": "text", "text": "Understood. I will use this knowledge to help you."}]},
     ]
     norm = [
         {"role": m["role"], "content": to_content_list(m.get("content", ""))}
         for m in messages
     ]
     if image is not None:
         for i in range(len(norm) - 1, -1, -1):
             if norm[i]["role"] == "user":
     rag_context = "" if image else retrieve_rag_context(req.messages)
     full_messages = build_full_messages(req.messages, image, rag_context)
+    # apply_chat_template with tokenize=True returns a plain Tensor, not a dict
+    # use return_dict=True to get {"input_ids": ..., "attention_mask": ...}
     inputs = processor.apply_chat_template(
         full_messages,
         add_generation_prompt=True,
         tokenize=True,
         return_tensors="pt",
+        return_dict=True,          # ← fixes: argument after ** must be a mapping, not Tensor
+    )
+    inputs = {k: v.to(model.device) for k, v in inputs.items()}
     with torch.no_grad():
         output_ids = model.generate(
             top_p=0.9,
         )
+    # decode only the newly generated tokens (skip the input prompt)
+    input_len = inputs["input_ids"].shape[1]
+    new_tokens = output_ids[0][input_len:]
+    response_text = processor.decode(new_tokens, skip_special_tokens=True)
     return {
         "response":   response_text,