Spaces:

Rady10
/

vision-model-api

Sleeping

App Files Files Community

vision-model-api / app.py

Rady10

Update app.py

5b9d376 verified 27 days ago

raw

history blame contribute delete

7.59 kB

	import os
	import base64
	import torch
	import faiss
	import json

	from fastapi import FastAPI
	from pydantic import BaseModel
	from contextlib import asynccontextmanager
	from huggingface_hub import snapshot_download
	from sentence_transformers import SentenceTransformer
	from PIL import Image
	from io import BytesIO

	from transformers import (
	AutoProcessor,
	Qwen3VLForConditionalGeneration,
	)

	# ─────────────────────────────
	# CONFIG
	# ─────────────────────────────
	MODEL_REPO = "Rady10/Plant-Disease-Qwen3VL-2B"
	RAG_REPO = "Rady10/Agriculture-Rag-Data-Index"

	os.environ["TOKENIZERS_PARALLELISM"] = "false"

	# ─────────────────────────────
	# GLOBALS
	# ─────────────────────────────
	model = None
	processor = None
	faiss_index = None
	rag_chunks = None
	embedder = None

	# ─────────────────────────────
	# LIFESPAN
	# ─────────────────────────────
	@asynccontextmanager
	async def lifespan(app: FastAPI):
	global model, processor, faiss_index, rag_chunks, embedder

	print("Loading vision model...")
	processor = AutoProcessor.from_pretrained(MODEL_REPO, trust_remote_code=True)
	model = Qwen3VLForConditionalGeneration.from_pretrained(
	MODEL_REPO,
	torch_dtype=torch.float32,
	device_map="cpu",
	trust_remote_code=True,
	)
	model.eval()

	print("Loading RAG index...")
	rag_dir = snapshot_download(repo_id=RAG_REPO, repo_type="dataset", local_dir="./rag")
	faiss_index = faiss.read_index(os.path.join(rag_dir, "agro.index"))
	with open(os.path.join(rag_dir, "chunks.json"), "r", encoding="utf-8") as f:
	rag_chunks = json.load(f)

	embedder = SentenceTransformer(
	"sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
	)

	print("ALL LOADED ✔")
	yield


	# ─────────────────────────────
	# APP
	# ─────────────────────────────
	app = FastAPI(title="🌿 Plant Disease Chat API", lifespan=lifespan)


	# ─────────────────────────────
	# REQUEST MODEL
	# ─────────────────────────────
	class ChatRequest(BaseModel):
	messages: list
	image: str = None # base64 — if given, RAG is skipped automatically


	# ─────────────────────────────
	# HELPERS
	# ─────────────────────────────
	def decode_image(b64: str) -> Image.Image:
	return Image.open(BytesIO(base64.b64decode(b64))).convert("RGB")


	def chunk_to_text(chunk) -> str:
	if isinstance(chunk, str):
	return chunk
	if isinstance(chunk, dict):
	for key in ("text", "content", "passage", "chunk", "body"):
	if key in chunk and isinstance(chunk[key], str):
	return chunk[key]
	return " ".join(str(v) for v in chunk.values())
	return str(chunk)


	def to_content_list(content) -> list:
	"""content must always be a list of dicts for apply_chat_template"""
	if isinstance(content, str):
	return [{"type": "text", "text": content}]
	if isinstance(content, list):
	result = []
	for block in content:
	if isinstance(block, str):
	result.append({"type": "text", "text": block})
	else:
	result.append(block)
	return result
	return [{"type": "text", "text": str(content)}]


	def retrieve_rag_context(messages: list, k: int = 3) -> str:
	if not rag_chunks or faiss_index is None:
	return ""

	last_user_text = ""
	for m in reversed(messages):
	if m.get("role") != "user":
	continue
	content = m.get("content", "")
	if isinstance(content, list):
	for block in content:
	if isinstance(block, dict) and block.get("type") == "text":
	last_user_text = block["text"]
	break
	elif isinstance(content, str):
	last_user_text = content
	if last_user_text:
	break

	if not last_user_text.strip():
	return ""

	query_vec = embedder.encode([last_user_text])
	_, indices = faiss_index.search(query_vec, k=k)
	chunks = [chunk_to_text(rag_chunks[i]) for i in indices[0] if i < len(rag_chunks)]
	return "\n\n".join(chunks)


	def build_full_messages(messages: list, image: Image.Image, rag_context: str) -> list:
	system_parts = ["You are a plant disease expert assistant."]
	if rag_context:
	system_parts.append(
	"Use the following retrieved knowledge to inform your answer:\n\n" + rag_context
	)
	system_prompt = "\n\n".join(system_parts)

	# content MUST be list of dicts — never plain string
	full_messages = [
	{"role": "user", "content": [{"type": "text", "text": system_prompt}]},
	{"role": "assistant", "content": [{"type": "text", "text": "Understood. I will use this knowledge to help you."}]},
	]

	norm = [
	{"role": m["role"], "content": to_content_list(m.get("content", ""))}
	for m in messages
	]

	if image is not None:
	for i in range(len(norm) - 1, -1, -1):
	if norm[i]["role"] == "user":
	norm[i]["content"] = [{"type": "image", "image": image}] + norm[i]["content"]
	break

	full_messages.extend(norm)
	return full_messages


	# ─────────────────────────────
	# UNIFIED ENDPOINT
	# ─────────────────────────────
	@app.post("/chat")
	def chat(req: ChatRequest):
	image = decode_image(req.image) if req.image else None
	rag_context = "" if image else retrieve_rag_context(req.messages)
	full_messages = build_full_messages(req.messages, image, rag_context)

	# apply_chat_template with tokenize=True returns a plain Tensor, not a dict
	# use return_dict=True to get {"input_ids": ..., "attention_mask": ...}
	inputs = processor.apply_chat_template(
	full_messages,
	add_generation_prompt=True,
	tokenize=True,
	return_tensors="pt",
	return_dict=True, # ← fixes: argument after ** must be a mapping, not Tensor
	)
	inputs = {k: v.to(model.device) for k, v in inputs.items()}

	with torch.no_grad():
	output_ids = model.generate(
	**inputs,
	max_new_tokens=512,
	temperature=0.7,
	top_p=0.9,
	)

	# decode only the newly generated tokens (skip the input prompt)
	input_len = inputs["input_ids"].shape[1]
	new_tokens = output_ids[0][input_len:]
	response_text = processor.decode(new_tokens, skip_special_tokens=True)

	return {
	"response": response_text,
	"rag_used": bool(rag_context),
	"image_used": image is not None,
	}


	# ─────────────────────────────
	# HEALTH CHECK
	# ─────────────────────────────
	@app.get("/")
	def root():
	return {"status": "plant disease chat api running"}