Spaces:

Rady10
/

vision-model-api

Sleeping

App Files Files Community

Rady10 commited on May 7

Commit

56d265c

verified ·

1 Parent(s): eab4ea1

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -44

app.py CHANGED Viewed

@@ -23,7 +23,6 @@ from transformers import (
 MODEL_REPO = "Rady10/Plant-Disease-Qwen3VL-2B"
 RAG_REPO   = "Rady10/Agriculture-Rag-Data-Index"
-DEVICE = "cpu"
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
 # ─────────────────────────────
@@ -43,10 +42,7 @@ async def lifespan(app: FastAPI):
     global model, processor, faiss_index, rag_chunks, embedder
     print("Loading vision model...")
-    processor = AutoProcessor.from_pretrained(
-        MODEL_REPO,
-        trust_remote_code=True,
-    )
     model = Qwen3VLForConditionalGeneration.from_pretrained(
         MODEL_REPO,
         torch_dtype=torch.float32,
@@ -56,11 +52,7 @@ async def lifespan(app: FastAPI):
     model.eval()
     print("Loading RAG index...")
-    rag_dir = snapshot_download(
-        repo_id=RAG_REPO,
-        repo_type="dataset",
-        local_dir="./rag",
-    )
     faiss_index = faiss.read_index(os.path.join(rag_dir, "agro.index"))
     with open(os.path.join(rag_dir, "chunks.json"), "r", encoding="utf-8") as f:
         rag_chunks = json.load(f)
@@ -84,40 +76,49 @@ app = FastAPI(title="🌿 Plant Disease Chat API", lifespan=lifespan)
 # ─────────────────────────────
 class ChatRequest(BaseModel):
     messages: list
-    image: str = None
-    # image present → RAG skipped automatically
 # ─────────────────────────────
 # HELPERS
 # ─────────────────────────────
-def decode_image(base64_str: str) -> Image.Image:
-    img_bytes = base64.b64decode(base64_str)
-    return Image.open(BytesIO(img_bytes)).convert("RGB")
 def chunk_to_text(chunk) -> str:
-    """
-    Safely convert a chunk to plain string regardless of its type.
-    chunks.json may contain strings, dicts, or other structures.
-    """
     if isinstance(chunk, str):
         return chunk
     if isinstance(chunk, dict):
-        # common keys used in RAG datasets — try in order
         for key in ("text", "content", "passage", "chunk", "body"):
             if key in chunk and isinstance(chunk[key], str):
                 return chunk[key]
-        # fallback: join all string values
         return " ".join(str(v) for v in chunk.values())
     return str(chunk)
 def retrieve_rag_context(messages: list, k: int = 3) -> str:
     if not rag_chunks or faiss_index is None:
         return ""
-    # find last user text
     last_user_text = ""
     for m in reversed(messages):
         if m.get("role") != "user":
@@ -138,12 +139,7 @@ def retrieve_rag_context(messages: list, k: int = 3) -> str:
     query_vec = embedder.encode([last_user_text])
     _, indices = faiss_index.search(query_vec, k=k)
-    chunks = [
-        chunk_to_text(rag_chunks[i])
-        for i in indices[0]
-        if i < len(rag_chunks)
-    ]
     return "\n\n".join(chunks)
@@ -151,29 +147,30 @@ def build_full_messages(messages: list, image: Image.Image, rag_context: str) ->
     system_parts = ["You are a plant disease expert assistant."]
     if rag_context:
         system_parts.append(
-            "Use the following retrieved knowledge to inform your answer:\n\n"
-            + rag_context
         )
     system_prompt = "\n\n".join(system_parts)
     full_messages = [
-        {"role": "user",      "content": system_prompt},
-        {"role": "assistant", "content": "Understood. I will use this knowledge to help you."},
     ]
-    messages = [dict(m) for m in messages]
     if image is not None:
-        for i in range(len(messages) - 1, -1, -1):
-            if messages[i].get("role") == "user":
-                content = messages[i].get("content", "")
-                if isinstance(content, str):
-                    content = [{"type": "text", "text": content}]
-                content = [{"type": "image", "image": image}] + content
-                messages[i]["content"] = content
                 break
-    full_messages.extend(messages)
     return full_messages
@@ -183,9 +180,6 @@ def build_full_messages(messages: list, image: Image.Image, rag_context: str) ->
 @app.post("/chat")
 def chat(req: ChatRequest):
     image = decode_image(req.image) if req.image else None
-    # image present → use model's own vision training only (no RAG)
-    # no image      → use RAG to ground the text answer
     rag_context = "" if image else retrieve_rag_context(req.messages)
     full_messages = build_full_messages(req.messages, image, rag_context)

 MODEL_REPO = "Rady10/Plant-Disease-Qwen3VL-2B"
 RAG_REPO   = "Rady10/Agriculture-Rag-Data-Index"
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
 # ─────────────────────────────
     global model, processor, faiss_index, rag_chunks, embedder
     print("Loading vision model...")
+    processor = AutoProcessor.from_pretrained(MODEL_REPO, trust_remote_code=True)
     model = Qwen3VLForConditionalGeneration.from_pretrained(
         MODEL_REPO,
         torch_dtype=torch.float32,
     model.eval()
     print("Loading RAG index...")
+    rag_dir = snapshot_download(repo_id=RAG_REPO, repo_type="dataset", local_dir="./rag")
     faiss_index = faiss.read_index(os.path.join(rag_dir, "agro.index"))
     with open(os.path.join(rag_dir, "chunks.json"), "r", encoding="utf-8") as f:
         rag_chunks = json.load(f)
 # ─────────────────────────────
 class ChatRequest(BaseModel):
     messages: list
+    image: str = None     # base64 — if given, RAG is skipped automatically
 # ─────────────────────────────
 # HELPERS
 # ─────────────────────────────
+def decode_image(b64: str) -> Image.Image:
+    return Image.open(BytesIO(base64.b64decode(b64))).convert("RGB")
 def chunk_to_text(chunk) -> str:
     if isinstance(chunk, str):
         return chunk
     if isinstance(chunk, dict):
         for key in ("text", "content", "passage", "chunk", "body"):
             if key in chunk and isinstance(chunk[key], str):
                 return chunk[key]
         return " ".join(str(v) for v in chunk.values())
     return str(chunk)
+def to_content_list(content) -> list:
+    """
+    apply_chat_template requires content to ALWAYS be a list of dicts.
+    Never a plain string — that causes: TypeError: string indices must be integers
+    """
+    if isinstance(content, str):
+        return [{"type": "text", "text": content}]
+    if isinstance(content, list):
+        result = []
+        for block in content:
+            if isinstance(block, str):
+                result.append({"type": "text", "text": block})
+            else:
+                result.append(block)
+        return result
+    return [{"type": "text", "text": str(content)}]
 def retrieve_rag_context(messages: list, k: int = 3) -> str:
     if not rag_chunks or faiss_index is None:
         return ""
     last_user_text = ""
     for m in reversed(messages):
         if m.get("role") != "user":
     query_vec = embedder.encode([last_user_text])
     _, indices = faiss_index.search(query_vec, k=k)
+    chunks = [chunk_to_text(rag_chunks[i]) for i in indices[0] if i < len(rag_chunks)]
     return "\n\n".join(chunks)
     system_parts = ["You are a plant disease expert assistant."]
     if rag_context:
         system_parts.append(
+            "Use the following retrieved knowledge to inform your answer:\n\n" + rag_context
         )
     system_prompt = "\n\n".join(system_parts)
+    # ⚠️ content MUST be list of dicts — never a plain string
     full_messages = [
+        {"role": "user",      "content": [{"type": "text", "text": system_prompt}]},
+        {"role": "assistant", "content": [{"type": "text", "text": "Understood. I will use this knowledge to help you."}]},
     ]
+    # normalize every incoming message too
+    norm = [
+        {"role": m["role"], "content": to_content_list(m.get("content", ""))}
+        for m in messages
+    ]
+    # inject image into last user turn
     if image is not None:
+        for i in range(len(norm) - 1, -1, -1):
+            if norm[i]["role"] == "user":
+                norm[i]["content"] = [{"type": "image", "image": image}] + norm[i]["content"]
                 break
+    full_messages.extend(norm)
     return full_messages
 @app.post("/chat")
 def chat(req: ChatRequest):
     image = decode_image(req.image) if req.image else None
     rag_context = "" if image else retrieve_rag_context(req.messages)
     full_messages = build_full_messages(req.messages, image, rag_context)