Spaces:

EYEDOL
/

AGRO

Running

App Files Files Community

EYEDOL commited on Nov 16, 2025

Commit

9495b41

verified ·

1 Parent(s): 2e9bf08

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -17

app.py CHANGED Viewed

@@ -1,10 +1,9 @@
 """
-Gradio Space app (app.py) — Preloaded SigLip + Llava pipeline for instant response
 Pipeline:
-1. At startup: load SigLip processor & model, compute all text embeddings.
-2. At startup: load Llava tokenizer & model.
-3. User uploads an image and asks a question → pipeline uses preloaded resources for instant retrieval and response.
 """
 import os
@@ -13,13 +12,17 @@ from typing import List, Tuple
 import gradio as gr
 import torch
 import torch.nn.functional as F
-from datasets import load_dataset, concatenate_datasets
 from PIL import Image
-from transformers import AutoProcessor, AutoModel, AutoTokenizer, AutoModelForCausalLM
-from tqdm import tqdm
 SIGLIP_MODEL_ID = "EYEDOL/siglipFULL-agri-finetuned"
-LLAVA_MODEL_ID = "llava-hf/llava-1.5-7b-hf"  # replace with actual model
 DATASET_TEMPLATE = "EYEDOL/AGRILLAVA-image-text{}"
 NUM_DATASETS = 1
 BATCH_SIZE = 16
@@ -28,7 +31,7 @@ TOP_K_DEFAULT = 3
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # -------------------------
-# Startup: load all datasets and compute text embeddings
 # -------------------------
 print("⏳ Loading datasets and computing SigLip text embeddings...")
 texts_all = []
@@ -41,7 +44,7 @@ siglip_model = AutoModel.from_pretrained(SIGLIP_MODEL_ID).to(device)
 siglip_model.eval()
 text_embeds_all = []
-for i in tqdm(range(0, len(texts_all), BATCH_SIZE), desc="Encoding texts"):
     batch_texts = texts_all[i:i+BATCH_SIZE]
     inputs = siglip_processor(text=batch_texts, padding=True, truncation=True, return_tensors="pt").to(device)
     with torch.no_grad():
@@ -57,14 +60,14 @@ print(f"✅ Finished encoding {len(texts_all)} texts. Shape: {text_embeds_all.sh
 # -------------------------
 # Startup: load Llava model & tokenizer
 # -------------------------
-print("⏳ Loading Llava model and tokenizer...")
 llava_tokenizer = AutoTokenizer.from_pretrained(LLAVA_MODEL_ID, use_fast=False)
-llava_model = AutoModelForCausalLM.from_pretrained(LLAVA_MODEL_ID).to(device)
 llava_model.eval()
 print("✅ Llava model loaded.")
 # -------------------------
-# SigLip retrieval
 # -------------------------
 def retrieve_top_k_texts(image: Image.Image, k=TOP_K_DEFAULT):
@@ -79,7 +82,7 @@ def retrieve_top_k_texts(image: Image.Image, k=TOP_K_DEFAULT):
     return results
 # -------------------------
-# Llava answer
 # -------------------------
 def llava_answer(image: Image.Image, retrieved_texts: List[str], question: str, max_tokens=256):
@@ -93,7 +96,7 @@ def llava_answer(image: Image.Image, retrieved_texts: List[str], question: str,
     return response
 # -------------------------
-# Gradio interface
 # -------------------------
 def gradio_pipeline(image: Image.Image, question: str, k: int = TOP_K_DEFAULT):
@@ -104,6 +107,9 @@ def gradio_pipeline(image: Image.Image, question: str, k: int = TOP_K_DEFAULT):
     response = llava_answer(image, retrieved_texts, question)
     return image, response
 with gr.Blocks(title="Agri Image + Question → Llava Response") as demo:
     gr.Markdown("# Agri Image Question Answering\nUpload an agriculture image, ask a question, and get context-aware crop suggestions.")
     with gr.Row():
@@ -117,4 +123,4 @@ with gr.Blocks(title="Agri Image + Question → Llava Response") as demo:
     run_btn.click(fn=gradio_pipeline, inputs=[img_in, question_input, k_slider], outputs=[out_img, txt_out])
 if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", share=False)

 """
+Gradio Space app: Preloaded SigLip + Llava pipeline for instant user response.
 Pipeline:
+1. Startup: load SigLip processor, model, compute all text embeddings.
+2. Startup: load Llava tokenizer & LlavaForCausalLM model.
+3. User uploads image + asks question → instant retrieval + Llava response.
 """
 import os
 import gradio as gr
 import torch
 import torch.nn.functional as F
+from datasets import load_dataset
 from PIL import Image
+from transformers import AutoProcessor
+# Install llava repo if not already installed:
+# pip install git+https://github.com/haotian-liu/LLaVA.git
+from llava.model import LlavaForCausalLM
+from transformers import AutoTokenizer
 SIGLIP_MODEL_ID = "EYEDOL/siglipFULL-agri-finetuned"
+LLAVA_MODEL_ID = "llava-hf/llava-1.5-7b-hf"  # replace with your actual model repo
 DATASET_TEMPLATE = "EYEDOL/AGRILLAVA-image-text{}"
 NUM_DATASETS = 1
 BATCH_SIZE = 16
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # -------------------------
+# Startup: load datasets and compute SigLip text embeddings
 # -------------------------
 print("⏳ Loading datasets and computing SigLip text embeddings...")
 texts_all = []
 siglip_model.eval()
 text_embeds_all = []
+for i in range(0, len(texts_all), BATCH_SIZE):
     batch_texts = texts_all[i:i+BATCH_SIZE]
     inputs = siglip_processor(text=batch_texts, padding=True, truncation=True, return_tensors="pt").to(device)
     with torch.no_grad():
 # -------------------------
 # Startup: load Llava model & tokenizer
 # -------------------------
+print("⏳ Loading Llava tokenizer and LlavaForCausalLM model...")
 llava_tokenizer = AutoTokenizer.from_pretrained(LLAVA_MODEL_ID, use_fast=False)
+llava_model = LlavaForCausalLM.from_pretrained(LLAVA_MODEL_ID).to(device)
 llava_model.eval()
 print("✅ Llava model loaded.")
 # -------------------------
+# SigLip retrieval function
 # -------------------------
 def retrieve_top_k_texts(image: Image.Image, k=TOP_K_DEFAULT):
     return results
 # -------------------------
+# Llava answer function
 # -------------------------
 def llava_answer(image: Image.Image, retrieved_texts: List[str], question: str, max_tokens=256):
     return response
 # -------------------------
+# Gradio interface pipeline
 # -------------------------
 def gradio_pipeline(image: Image.Image, question: str, k: int = TOP_K_DEFAULT):
     response = llava_answer(image, retrieved_texts, question)
     return image, response
+# -------------------------
+# Gradio Blocks
+# -------------------------
 with gr.Blocks(title="Agri Image + Question → Llava Response") as demo:
     gr.Markdown("# Agri Image Question Answering\nUpload an agriculture image, ask a question, and get context-aware crop suggestions.")
     with gr.Row():
     run_btn.click(fn=gradio_pipeline, inputs=[img_in, question_input, k_slider], outputs=[out_img, txt_out])
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", share=False)