Refactor climate advisory agent to support video input and improve model loading. Update requirements to use the latest transformers from GitHub and add new utility dependencies. Clean up code by removing unnecessary comments and enhancing descriptions for clarity.
Browse files- app/agents/climate_agent.py +100 -35
- app/agents/crew_pipeline.py +2 -20
- app/main.py +4 -10
- app/utils/config.py +0 -4
- app/utils/model_manager.py +38 -50
- requirements.txt +3 -2
app/agents/climate_agent.py
CHANGED
|
@@ -1,19 +1,23 @@
|
|
| 1 |
"""
|
| 2 |
Farmer-First Climate-Resilient Advisory Agent
|
| 3 |
|
| 4 |
-
Uses a multimodal
|
| 5 |
-
smallholder farmers
|
|
|
|
|
|
|
| 6 |
"""
|
| 7 |
|
| 8 |
import io
|
| 9 |
import logging
|
|
|
|
|
|
|
| 10 |
from typing import Optional, Dict, Any
|
| 11 |
|
| 12 |
-
from PIL import Image
|
| 13 |
import requests
|
|
|
|
| 14 |
|
| 15 |
from app.utils import config
|
| 16 |
-
from app.utils.model_manager import load_multimodal_model
|
| 17 |
from app.utils.memory import memory_store
|
| 18 |
|
| 19 |
logging.basicConfig(
|
|
@@ -59,19 +63,35 @@ def _build_weather_context(latitude: Optional[float], longitude: Optional[float]
|
|
| 59 |
return ""
|
| 60 |
|
| 61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
def advise_climate_resilient(
|
| 63 |
query: str,
|
| 64 |
session_id: str,
|
| 65 |
latitude: Optional[float] = None,
|
| 66 |
longitude: Optional[float] = None,
|
| 67 |
image_bytes: Optional[bytes] = None,
|
|
|
|
| 68 |
) -> Dict[str, Any]:
|
| 69 |
"""
|
| 70 |
Run the Farmer-First Climate-Resilient advisory pipeline with optional image + GPS.
|
| 71 |
-
|
| 72 |
-
|
|
|
|
|
|
|
|
|
|
| 73 |
"""
|
| 74 |
processor, model = load_multimodal_model(config.MULTIMODAL_MODEL_NAME)
|
|
|
|
| 75 |
|
| 76 |
# Conversation history (text-only, 1-hour TTL shared with core pipeline)
|
| 77 |
history = memory_store.get_history(session_id) or []
|
|
@@ -122,6 +142,7 @@ def advise_climate_resilient(
|
|
| 122 |
else "No photo is attached. Use only the text and any weather/location information.\n"
|
| 123 |
)
|
| 124 |
|
|
|
|
| 125 |
prompt_parts = [system_prompt]
|
| 126 |
if location_context:
|
| 127 |
prompt_parts.append("\nLOCATION & WEATHER CONTEXT:\n")
|
|
@@ -141,39 +162,82 @@ def advise_climate_resilient(
|
|
| 141 |
|
| 142 |
full_prompt = "".join(prompt_parts)
|
| 143 |
|
| 144 |
-
#
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
inputs = processor(
|
| 156 |
-
text=
|
| 157 |
-
images=
|
|
|
|
|
|
|
| 158 |
return_tensors="pt",
|
| 159 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
else:
|
| 161 |
-
|
| 162 |
-
|
|
|
|
|
|
|
|
|
|
| 163 |
return_tensors="pt",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
)
|
| 165 |
-
|
| 166 |
-
inputs = {k: v.to(model.device) for k, v in inputs.items()}
|
| 167 |
-
|
| 168 |
-
generated_ids = model.generate(
|
| 169 |
-
**inputs,
|
| 170 |
-
max_new_tokens=512,
|
| 171 |
-
temperature=0.4,
|
| 172 |
-
top_p=0.9,
|
| 173 |
-
)
|
| 174 |
-
|
| 175 |
-
outputs = processor.batch_decode(generated_ids, skip_special_tokens=True)
|
| 176 |
-
answer = (outputs[0] if outputs else "").strip()
|
| 177 |
|
| 178 |
# Save to shared memory history
|
| 179 |
history.append({"role": "user", "content": query})
|
|
@@ -185,8 +249,9 @@ def advise_climate_resilient(
|
|
| 185 |
"answer": answer,
|
| 186 |
"latitude": latitude,
|
| 187 |
"longitude": longitude,
|
| 188 |
-
"used_image": bool(
|
| 189 |
-
"
|
|
|
|
| 190 |
}
|
| 191 |
|
| 192 |
|
|
|
|
| 1 |
"""
|
| 2 |
Farmer-First Climate-Resilient Advisory Agent
|
| 3 |
|
| 4 |
+
Uses a multimodal Qwen2-VL model (when available) to provide
|
| 5 |
+
climate-resilient advice to smallholder farmers from text, optional
|
| 6 |
+
photo/video, and GPS location. Falls back to text-only Qwen on
|
| 7 |
+
environments where Qwen2-VL cannot be fully initialized.
|
| 8 |
"""
|
| 9 |
|
| 10 |
import io
|
| 11 |
import logging
|
| 12 |
+
import os
|
| 13 |
+
import tempfile
|
| 14 |
from typing import Optional, Dict, Any
|
| 15 |
|
|
|
|
| 16 |
import requests
|
| 17 |
+
from qwen_vl_utils import process_vision_info
|
| 18 |
|
| 19 |
from app.utils import config
|
| 20 |
+
from app.utils.model_manager import load_multimodal_model, load_expert_model
|
| 21 |
from app.utils.memory import memory_store
|
| 22 |
|
| 23 |
logging.basicConfig(
|
|
|
|
| 63 |
return ""
|
| 64 |
|
| 65 |
|
| 66 |
+
def _save_temp_file(data: bytes, suffix: str) -> str:
|
| 67 |
+
"""
|
| 68 |
+
Save bytes to a temporary file and return a file:// URI for Qwen2-VL.
|
| 69 |
+
"""
|
| 70 |
+
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
|
| 71 |
+
tmp.write(data)
|
| 72 |
+
tmp.flush()
|
| 73 |
+
tmp.close()
|
| 74 |
+
return f"file://{tmp.name}"
|
| 75 |
+
|
| 76 |
+
|
| 77 |
def advise_climate_resilient(
|
| 78 |
query: str,
|
| 79 |
session_id: str,
|
| 80 |
latitude: Optional[float] = None,
|
| 81 |
longitude: Optional[float] = None,
|
| 82 |
image_bytes: Optional[bytes] = None,
|
| 83 |
+
video_bytes: Optional[bytes] = None,
|
| 84 |
) -> Dict[str, Any]:
|
| 85 |
"""
|
| 86 |
Run the Farmer-First Climate-Resilient advisory pipeline with optional image + GPS.
|
| 87 |
+
|
| 88 |
+
Tries to use a multimodal Qwen-VL model when available; if the
|
| 89 |
+
multimodal stack cannot be loaded on this environment, gracefully
|
| 90 |
+
falls back to text-only Qwen while still using location/weather
|
| 91 |
+
context.
|
| 92 |
"""
|
| 93 |
processor, model = load_multimodal_model(config.MULTIMODAL_MODEL_NAME)
|
| 94 |
+
use_multimodal = processor is not None and model is not None
|
| 95 |
|
| 96 |
# Conversation history (text-only, 1-hour TTL shared with core pipeline)
|
| 97 |
history = memory_store.get_history(session_id) or []
|
|
|
|
| 142 |
else "No photo is attached. Use only the text and any weather/location information.\n"
|
| 143 |
)
|
| 144 |
|
| 145 |
+
# Build a single user text block that includes context + question.
|
| 146 |
prompt_parts = [system_prompt]
|
| 147 |
if location_context:
|
| 148 |
prompt_parts.append("\nLOCATION & WEATHER CONTEXT:\n")
|
|
|
|
| 162 |
|
| 163 |
full_prompt = "".join(prompt_parts)
|
| 164 |
|
| 165 |
+
# Multimodal path (if supported)
|
| 166 |
+
answer = ""
|
| 167 |
+
used_image_flag = False
|
| 168 |
+
used_video_flag = False
|
| 169 |
+
|
| 170 |
+
if use_multimodal:
|
| 171 |
+
# Build Qwen2-VL messages following official pattern
|
| 172 |
+
image_uri = _save_temp_file(image_bytes, ".jpg") if image_bytes else None
|
| 173 |
+
video_uri = _save_temp_file(video_bytes, ".mp4") if video_bytes else None
|
| 174 |
+
|
| 175 |
+
user_content = []
|
| 176 |
+
if image_uri:
|
| 177 |
+
user_content.append({"type": "image", "image": image_uri})
|
| 178 |
+
used_image_flag = True
|
| 179 |
+
if video_uri:
|
| 180 |
+
user_content.append(
|
| 181 |
+
{
|
| 182 |
+
"type": "video",
|
| 183 |
+
"video": video_uri,
|
| 184 |
+
"fps": 1.0,
|
| 185 |
+
}
|
| 186 |
+
)
|
| 187 |
+
used_video_flag = True
|
| 188 |
+
|
| 189 |
+
user_content.append({"type": "text", "text": full_prompt})
|
| 190 |
+
|
| 191 |
+
messages = [
|
| 192 |
+
{"role": "system", "content": system_prompt},
|
| 193 |
+
{"role": "user", "content": user_content},
|
| 194 |
+
]
|
| 195 |
+
|
| 196 |
+
text_prompt = processor.apply_chat_template(
|
| 197 |
+
messages, tokenize=False, add_generation_prompt=True
|
| 198 |
+
)
|
| 199 |
+
image_inputs, video_inputs = process_vision_info(messages)
|
| 200 |
+
|
| 201 |
inputs = processor(
|
| 202 |
+
text=[text_prompt],
|
| 203 |
+
images=image_inputs,
|
| 204 |
+
videos=video_inputs,
|
| 205 |
+
padding=True,
|
| 206 |
return_tensors="pt",
|
| 207 |
)
|
| 208 |
+
inputs = {k: v.to(model.device) for k, v in inputs.items()}
|
| 209 |
+
|
| 210 |
+
generated_ids = model.generate(
|
| 211 |
+
**inputs,
|
| 212 |
+
max_new_tokens=512,
|
| 213 |
+
temperature=0.4,
|
| 214 |
+
top_p=0.9,
|
| 215 |
+
)
|
| 216 |
+
generated_ids_trimmed = [
|
| 217 |
+
out_ids[len(in_ids) :]
|
| 218 |
+
for in_ids, out_ids in zip(inputs["input_ids"], generated_ids)
|
| 219 |
+
]
|
| 220 |
+
outputs = processor.batch_decode(
|
| 221 |
+
generated_ids_trimmed,
|
| 222 |
+
skip_special_tokens=True,
|
| 223 |
+
clean_up_tokenization_spaces=False,
|
| 224 |
+
)
|
| 225 |
+
answer = (outputs[0] if outputs else "").strip()
|
| 226 |
else:
|
| 227 |
+
# Fallback: text-only Qwen expert model, still using climate-aware prompt
|
| 228 |
+
logging.info("Multimodal model unavailable; using text-only expert model for /advise.")
|
| 229 |
+
tokenizer, text_model = load_expert_model(config.EXPERT_MODEL_NAME, use_quantization=True)
|
| 230 |
+
inputs = tokenizer(
|
| 231 |
+
full_prompt,
|
| 232 |
return_tensors="pt",
|
| 233 |
+
).to(text_model.device)
|
| 234 |
+
generated_ids = text_model.generate(
|
| 235 |
+
**inputs,
|
| 236 |
+
max_new_tokens=512,
|
| 237 |
+
temperature=0.4,
|
| 238 |
+
top_p=0.9,
|
| 239 |
)
|
| 240 |
+
answer = tokenizer.decode(generated_ids[0], skip_special_tokens=True).strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 241 |
|
| 242 |
# Save to shared memory history
|
| 243 |
history.append({"role": "user", "content": query})
|
|
|
|
| 249 |
"answer": answer,
|
| 250 |
"latitude": latitude,
|
| 251 |
"longitude": longitude,
|
| 252 |
+
"used_image": bool(used_image_flag),
|
| 253 |
+
"used_video": bool(used_video_flag),
|
| 254 |
+
"model_used": config.MULTIMODAL_MODEL_NAME if use_multimodal else config.EXPERT_MODEL_NAME,
|
| 255 |
}
|
| 256 |
|
| 257 |
|
app/agents/crew_pipeline.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
| 1 |
-
# Aglimate/app/agents/crew_pipeline.py
|
| 2 |
import os
|
| 3 |
import sys
|
| 4 |
import re
|
|
@@ -13,21 +12,14 @@ from huggingface_hub import hf_hub_download
|
|
| 13 |
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSeq2SeqLM, NllbTokenizer
|
| 14 |
from sentence_transformers import SentenceTransformer
|
| 15 |
from app.utils import config
|
| 16 |
-
from app.utils.memory import memory_store
|
| 17 |
from typing import List
|
| 18 |
|
| 19 |
|
| 20 |
-
hf_cache = "/models/huggingface"
|
| 21 |
-
os.environ["HF_HOME"] = hf_cache
|
| 22 |
-
os.environ["TRANSFORMERS_CACHE"] = hf_cache
|
| 23 |
-
os.environ["HUGGINGFACE_HUB_CACHE"] = hf_cache
|
| 24 |
-
os.makedirs(hf_cache, exist_ok=True)
|
| 25 |
-
|
| 26 |
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
| 27 |
if BASE_DIR not in sys.path:
|
| 28 |
sys.path.insert(0, BASE_DIR)
|
| 29 |
|
| 30 |
-
# Lazy loading - models loaded on demand via model_manager
|
| 31 |
from app.utils.model_manager import (
|
| 32 |
load_expert_model,
|
| 33 |
load_translation_model,
|
|
@@ -37,9 +29,7 @@ from app.utils.model_manager import (
|
|
| 37 |
get_device
|
| 38 |
)
|
| 39 |
|
| 40 |
-
DEVICE = get_device()
|
| 41 |
-
|
| 42 |
-
# Models will be loaded lazily when needed
|
| 43 |
_tokenizer = None
|
| 44 |
_model = None
|
| 45 |
_embedder = None
|
|
@@ -50,7 +40,6 @@ _classifier = None
|
|
| 50 |
|
| 51 |
|
| 52 |
def get_expert_model():
|
| 53 |
-
"""Lazy load expert model."""
|
| 54 |
global _tokenizer, _model
|
| 55 |
if _tokenizer is None or _model is None:
|
| 56 |
_tokenizer, _model = load_expert_model(config.EXPERT_MODEL_NAME, use_quantization=True)
|
|
@@ -58,7 +47,6 @@ def get_expert_model():
|
|
| 58 |
|
| 59 |
|
| 60 |
def get_embedder():
|
| 61 |
-
"""Lazy load embedder."""
|
| 62 |
global _embedder
|
| 63 |
if _embedder is None:
|
| 64 |
_embedder = load_embedder(config.EMBEDDING_MODEL)
|
|
@@ -66,7 +54,6 @@ def get_embedder():
|
|
| 66 |
|
| 67 |
|
| 68 |
def get_lang_identifier():
|
| 69 |
-
"""Lazy load language identifier."""
|
| 70 |
global _lang_identifier
|
| 71 |
if _lang_identifier is None:
|
| 72 |
_lang_identifier = load_lang_identifier(
|
|
@@ -77,7 +64,6 @@ def get_lang_identifier():
|
|
| 77 |
|
| 78 |
|
| 79 |
def get_translation_model():
|
| 80 |
-
"""Lazy load translation model."""
|
| 81 |
global _translation_tokenizer, _translation_model
|
| 82 |
if _translation_tokenizer is None or _translation_model is None:
|
| 83 |
_translation_tokenizer, _translation_model = load_translation_model(config.TRANSLATION_MODEL_NAME)
|
|
@@ -85,7 +71,6 @@ def get_translation_model():
|
|
| 85 |
|
| 86 |
|
| 87 |
def get_classifier():
|
| 88 |
-
"""Lazy load classifier."""
|
| 89 |
global _classifier
|
| 90 |
if _classifier is None:
|
| 91 |
_classifier = load_classifier(config.CLASSIFIER_PATH)
|
|
@@ -99,8 +84,6 @@ def detect_language(text: str, top_k: int = 1):
|
|
| 99 |
labels, probs = lang_identifier.predict(clean_text, k=top_k)
|
| 100 |
return [(l.replace("__label__", ""), float(p)) for l, p in zip(labels, probs)]
|
| 101 |
|
| 102 |
-
# Translation model loaded lazily via get_translation_model()
|
| 103 |
-
|
| 104 |
SUPPORTED_LANGS = {
|
| 105 |
"eng_Latn": "English",
|
| 106 |
"ibo_Latn": "Igbo",
|
|
@@ -110,7 +93,6 @@ SUPPORTED_LANGS = {
|
|
| 110 |
"amh_Latn": "Amharic",
|
| 111 |
}
|
| 112 |
|
| 113 |
-
# Text chunking
|
| 114 |
_SENTENCE_SPLIT_RE = re.compile(r'(?<=[.!?])\s+')
|
| 115 |
|
| 116 |
def chunk_text(text: str, max_len: int = 400) -> List[str]:
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
import sys
|
| 3 |
import re
|
|
|
|
| 12 |
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSeq2SeqLM, NllbTokenizer
|
| 13 |
from sentence_transformers import SentenceTransformer
|
| 14 |
from app.utils import config
|
| 15 |
+
from app.utils.memory import memory_store
|
| 16 |
from typing import List
|
| 17 |
|
| 18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
| 20 |
if BASE_DIR not in sys.path:
|
| 21 |
sys.path.insert(0, BASE_DIR)
|
| 22 |
|
|
|
|
| 23 |
from app.utils.model_manager import (
|
| 24 |
load_expert_model,
|
| 25 |
load_translation_model,
|
|
|
|
| 29 |
get_device
|
| 30 |
)
|
| 31 |
|
| 32 |
+
DEVICE = get_device()
|
|
|
|
|
|
|
| 33 |
_tokenizer = None
|
| 34 |
_model = None
|
| 35 |
_embedder = None
|
|
|
|
| 40 |
|
| 41 |
|
| 42 |
def get_expert_model():
|
|
|
|
| 43 |
global _tokenizer, _model
|
| 44 |
if _tokenizer is None or _model is None:
|
| 45 |
_tokenizer, _model = load_expert_model(config.EXPERT_MODEL_NAME, use_quantization=True)
|
|
|
|
| 47 |
|
| 48 |
|
| 49 |
def get_embedder():
|
|
|
|
| 50 |
global _embedder
|
| 51 |
if _embedder is None:
|
| 52 |
_embedder = load_embedder(config.EMBEDDING_MODEL)
|
|
|
|
| 54 |
|
| 55 |
|
| 56 |
def get_lang_identifier():
|
|
|
|
| 57 |
global _lang_identifier
|
| 58 |
if _lang_identifier is None:
|
| 59 |
_lang_identifier = load_lang_identifier(
|
|
|
|
| 64 |
|
| 65 |
|
| 66 |
def get_translation_model():
|
|
|
|
| 67 |
global _translation_tokenizer, _translation_model
|
| 68 |
if _translation_tokenizer is None or _translation_model is None:
|
| 69 |
_translation_tokenizer, _translation_model = load_translation_model(config.TRANSLATION_MODEL_NAME)
|
|
|
|
| 71 |
|
| 72 |
|
| 73 |
def get_classifier():
|
|
|
|
| 74 |
global _classifier
|
| 75 |
if _classifier is None:
|
| 76 |
_classifier = load_classifier(config.CLASSIFIER_PATH)
|
|
|
|
| 84 |
labels, probs = lang_identifier.predict(clean_text, k=top_k)
|
| 85 |
return [(l.replace("__label__", ""), float(p)) for l, p in zip(labels, probs)]
|
| 86 |
|
|
|
|
|
|
|
| 87 |
SUPPORTED_LANGS = {
|
| 88 |
"eng_Latn": "English",
|
| 89 |
"ibo_Latn": "Igbo",
|
|
|
|
| 93 |
"amh_Latn": "Amharic",
|
| 94 |
}
|
| 95 |
|
|
|
|
| 96 |
_SENTENCE_SPLIT_RE = re.compile(r'(?<=[.!?])\s+')
|
| 97 |
|
| 98 |
def chunk_text(text: str, max_len: int = 400) -> List[str]:
|
app/main.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
| 1 |
-
# Aglimate_backend/app/main.py
|
| 2 |
import os
|
| 3 |
import sys
|
| 4 |
import logging
|
|
@@ -47,7 +46,6 @@ def startup_event():
|
|
| 47 |
|
| 48 |
@app.get("/")
|
| 49 |
def home():
|
| 50 |
-
"""Health check endpoint."""
|
| 51 |
return {
|
| 52 |
"status": "Aglimate climate-resilient backend running",
|
| 53 |
"version": "2.0.0",
|
|
@@ -94,7 +92,7 @@ async def advise_climate_resilient_endpoint(
|
|
| 94 |
),
|
| 95 |
video: Optional[UploadFile] = File(
|
| 96 |
None,
|
| 97 |
-
description="Optional short field video
|
| 98 |
),
|
| 99 |
):
|
| 100 |
"""
|
|
@@ -110,9 +108,8 @@ async def advise_climate_resilient_endpoint(
|
|
| 110 |
if not session_id:
|
| 111 |
session_id = str(uuid.uuid4())
|
| 112 |
|
| 113 |
-
image_bytes = None
|
| 114 |
-
if
|
| 115 |
-
image_bytes = await photo.read()
|
| 116 |
|
| 117 |
result = advise_climate_resilient(
|
| 118 |
query=query,
|
|
@@ -120,12 +117,9 @@ async def advise_climate_resilient_endpoint(
|
|
| 120 |
latitude=latitude,
|
| 121 |
longitude=longitude,
|
| 122 |
image_bytes=image_bytes,
|
|
|
|
| 123 |
)
|
| 124 |
|
| 125 |
-
# video is currently accepted but ignored; kept for forward-compatibility
|
| 126 |
-
if video is not None:
|
| 127 |
-
result["video_attached"] = True
|
| 128 |
-
|
| 129 |
return result
|
| 130 |
|
| 131 |
if __name__ == "__main__":
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
import sys
|
| 3 |
import logging
|
|
|
|
| 46 |
|
| 47 |
@app.get("/")
|
| 48 |
def home():
|
|
|
|
| 49 |
return {
|
| 50 |
"status": "Aglimate climate-resilient backend running",
|
| 51 |
"version": "2.0.0",
|
|
|
|
| 92 |
),
|
| 93 |
video: Optional[UploadFile] = File(
|
| 94 |
None,
|
| 95 |
+
description="Optional short field video of the farm (optional)",
|
| 96 |
),
|
| 97 |
):
|
| 98 |
"""
|
|
|
|
| 108 |
if not session_id:
|
| 109 |
session_id = str(uuid.uuid4())
|
| 110 |
|
| 111 |
+
image_bytes = await photo.read() if photo is not None else None
|
| 112 |
+
video_bytes = await video.read() if video is not None else None
|
|
|
|
| 113 |
|
| 114 |
result = advise_climate_resilient(
|
| 115 |
query=query,
|
|
|
|
| 117 |
latitude=latitude,
|
| 118 |
longitude=longitude,
|
| 119 |
image_bytes=image_bytes,
|
| 120 |
+
video_bytes=video_bytes,
|
| 121 |
)
|
| 122 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
return result
|
| 124 |
|
| 125 |
if __name__ == "__main__":
|
app/utils/config.py
CHANGED
|
@@ -1,5 +1,3 @@
|
|
| 1 |
-
#
|
| 2 |
-
# TerraSyncra_backend/app/utils/config.py
|
| 3 |
from pathlib import Path
|
| 4 |
import os
|
| 5 |
import sys
|
|
@@ -26,8 +24,6 @@ CLASSIFIER_CONFIDENCE_THRESHOLD = float(os.getenv("CLASSIFIER_CONFIDENCE_THRESHO
|
|
| 26 |
|
| 27 |
|
| 28 |
EXPERT_MODEL_NAME = os.getenv("EXPERT_MODEL_NAME", "Qwen/Qwen1.5-1.8B")
|
| 29 |
-
|
| 30 |
-
# Multimodal expert model (Qwen-VL) for image-aware advisory
|
| 31 |
MULTIMODAL_MODEL_NAME = os.getenv("MULTIMODAL_MODEL_NAME", "Qwen/Qwen2-VL-2B-Instruct")
|
| 32 |
|
| 33 |
LANG_ID_MODEL_REPO = os.getenv("LANG_ID_MODEL_REPO", "facebook/fasttext-language-identification")
|
|
|
|
|
|
|
|
|
|
| 1 |
from pathlib import Path
|
| 2 |
import os
|
| 3 |
import sys
|
|
|
|
| 24 |
|
| 25 |
|
| 26 |
EXPERT_MODEL_NAME = os.getenv("EXPERT_MODEL_NAME", "Qwen/Qwen1.5-1.8B")
|
|
|
|
|
|
|
| 27 |
MULTIMODAL_MODEL_NAME = os.getenv("MULTIMODAL_MODEL_NAME", "Qwen/Qwen2-VL-2B-Instruct")
|
| 28 |
|
| 29 |
LANG_ID_MODEL_REPO = os.getenv("LANG_ID_MODEL_REPO", "facebook/fasttext-language-identification")
|
app/utils/model_manager.py
CHANGED
|
@@ -1,8 +1,3 @@
|
|
| 1 |
-
# TerraSyncra/app/utils/model_manager.py
|
| 2 |
-
"""
|
| 3 |
-
Lazy Model Manager for CPU Optimization
|
| 4 |
-
Loads models on-demand instead of at import time.
|
| 5 |
-
"""
|
| 6 |
import os
|
| 7 |
import logging
|
| 8 |
import torch
|
|
@@ -11,7 +6,6 @@ from functools import lru_cache
|
|
| 11 |
|
| 12 |
logging.basicConfig(level=logging.INFO)
|
| 13 |
|
| 14 |
-
# Global model cache
|
| 15 |
_models = {
|
| 16 |
"expert_model": None,
|
| 17 |
"expert_tokenizer": None,
|
|
@@ -24,22 +18,14 @@ _models = {
|
|
| 24 |
"classifier": None,
|
| 25 |
}
|
| 26 |
|
| 27 |
-
_device = "cpu"
|
| 28 |
|
| 29 |
|
| 30 |
def get_device():
|
| 31 |
-
"""Always return CPU for HuggingFace Spaces."""
|
| 32 |
return _device
|
| 33 |
|
| 34 |
|
| 35 |
def load_expert_model(model_name: str, use_quantization: bool = True):
|
| 36 |
-
"""
|
| 37 |
-
Lazy load expert model with optional quantization.
|
| 38 |
-
|
| 39 |
-
Args:
|
| 40 |
-
model_name: Model identifier
|
| 41 |
-
use_quantization: Use INT8 quantization for CPU (recommended)
|
| 42 |
-
"""
|
| 43 |
if _models["expert_model"] is not None:
|
| 44 |
return _models["expert_tokenizer"], _models["expert_model"]
|
| 45 |
|
|
@@ -48,25 +34,20 @@ def load_expert_model(model_name: str, use_quantization: bool = True):
|
|
| 48 |
|
| 49 |
logging.info(f"Loading expert model ({model_name})...")
|
| 50 |
|
| 51 |
-
# Get cache directory from config
|
| 52 |
cache_dir = getattr(config, 'hf_cache', '/models/huggingface')
|
| 53 |
|
| 54 |
tokenizer = AutoTokenizer.from_pretrained(
|
| 55 |
model_name,
|
| 56 |
-
use_fast=True,
|
| 57 |
cache_dir=cache_dir
|
| 58 |
)
|
| 59 |
|
| 60 |
-
# Load model with CPU optimizations
|
| 61 |
model_kwargs = {
|
| 62 |
-
"torch_dtype": torch.float32,
|
| 63 |
"device_map": "cpu",
|
| 64 |
"low_cpu_mem_usage": True,
|
| 65 |
}
|
| 66 |
|
| 67 |
-
# Note: For CPU, we use float32 (most compatible)
|
| 68 |
-
# For quantization on CPU, consider using smaller models or ONNX runtime
|
| 69 |
-
# BitsAndBytesConfig is GPU-only, so we skip it for CPU deployment
|
| 70 |
logging.info("Loading model in float32 for CPU compatibility")
|
| 71 |
|
| 72 |
cache_dir = getattr(config, 'hf_cache', '/models/huggingface')
|
|
@@ -77,7 +58,7 @@ def load_expert_model(model_name: str, use_quantization: bool = True):
|
|
| 77 |
**model_kwargs
|
| 78 |
)
|
| 79 |
|
| 80 |
-
model.eval()
|
| 81 |
|
| 82 |
_models["expert_model"] = model
|
| 83 |
_models["expert_tokenizer"] = tokenizer
|
|
@@ -88,43 +69,50 @@ def load_expert_model(model_name: str, use_quantization: bool = True):
|
|
| 88 |
|
| 89 |
def load_multimodal_model(model_name: str):
|
| 90 |
"""
|
| 91 |
-
Lazy load multimodal
|
| 92 |
-
Used for photo-aware advisory.
|
| 93 |
"""
|
| 94 |
if _models["multimodal_model"] is not None:
|
| 95 |
return _models["multimodal_processor"], _models["multimodal_model"]
|
| 96 |
|
| 97 |
-
#
|
| 98 |
-
#
|
| 99 |
-
from transformers import AutoProcessor,
|
| 100 |
from app.utils import config
|
| 101 |
|
| 102 |
logging.info(f"Loading multimodal expert model ({model_name})...")
|
| 103 |
|
| 104 |
cache_dir = getattr(config, "hf_cache", "/models/huggingface")
|
| 105 |
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
|
| 129 |
|
| 130 |
def load_translation_model(model_name: str):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
import logging
|
| 3 |
import torch
|
|
|
|
| 6 |
|
| 7 |
logging.basicConfig(level=logging.INFO)
|
| 8 |
|
|
|
|
| 9 |
_models = {
|
| 10 |
"expert_model": None,
|
| 11 |
"expert_tokenizer": None,
|
|
|
|
| 18 |
"classifier": None,
|
| 19 |
}
|
| 20 |
|
| 21 |
+
_device = "cpu"
|
| 22 |
|
| 23 |
|
| 24 |
def get_device():
|
|
|
|
| 25 |
return _device
|
| 26 |
|
| 27 |
|
| 28 |
def load_expert_model(model_name: str, use_quantization: bool = True):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
if _models["expert_model"] is not None:
|
| 30 |
return _models["expert_tokenizer"], _models["expert_model"]
|
| 31 |
|
|
|
|
| 34 |
|
| 35 |
logging.info(f"Loading expert model ({model_name})...")
|
| 36 |
|
|
|
|
| 37 |
cache_dir = getattr(config, 'hf_cache', '/models/huggingface')
|
| 38 |
|
| 39 |
tokenizer = AutoTokenizer.from_pretrained(
|
| 40 |
model_name,
|
| 41 |
+
use_fast=True,
|
| 42 |
cache_dir=cache_dir
|
| 43 |
)
|
| 44 |
|
|
|
|
| 45 |
model_kwargs = {
|
| 46 |
+
"torch_dtype": torch.float32,
|
| 47 |
"device_map": "cpu",
|
| 48 |
"low_cpu_mem_usage": True,
|
| 49 |
}
|
| 50 |
|
|
|
|
|
|
|
|
|
|
| 51 |
logging.info("Loading model in float32 for CPU compatibility")
|
| 52 |
|
| 53 |
cache_dir = getattr(config, 'hf_cache', '/models/huggingface')
|
|
|
|
| 58 |
**model_kwargs
|
| 59 |
)
|
| 60 |
|
| 61 |
+
model.eval()
|
| 62 |
|
| 63 |
_models["expert_model"] = model
|
| 64 |
_models["expert_tokenizer"] = tokenizer
|
|
|
|
| 69 |
|
| 70 |
def load_multimodal_model(model_name: str):
|
| 71 |
"""
|
| 72 |
+
Lazy load multimodal Qwen2-VL model (vision-language).
|
| 73 |
+
Used for photo/video-aware advisory.
|
| 74 |
"""
|
| 75 |
if _models["multimodal_model"] is not None:
|
| 76 |
return _models["multimodal_processor"], _models["multimodal_model"]
|
| 77 |
|
| 78 |
+
# With latest transformers + qwen-vl-utils, Qwen2VLForConditionalGeneration
|
| 79 |
+
# and AutoProcessor support full image/video chat as in official docs.
|
| 80 |
+
from transformers import AutoProcessor, Qwen2VLForConditionalGeneration
|
| 81 |
from app.utils import config
|
| 82 |
|
| 83 |
logging.info(f"Loading multimodal expert model ({model_name})...")
|
| 84 |
|
| 85 |
cache_dir = getattr(config, "hf_cache", "/models/huggingface")
|
| 86 |
|
| 87 |
+
try:
|
| 88 |
+
processor = AutoProcessor.from_pretrained(
|
| 89 |
+
model_name,
|
| 90 |
+
cache_dir=cache_dir,
|
| 91 |
+
)
|
| 92 |
+
|
| 93 |
+
model = Qwen2VLForConditionalGeneration.from_pretrained(
|
| 94 |
+
model_name,
|
| 95 |
+
torch_dtype=torch.float32, # CPU deployment
|
| 96 |
+
cache_dir=cache_dir,
|
| 97 |
+
device_map="cpu",
|
| 98 |
+
low_cpu_mem_usage=True,
|
| 99 |
+
)
|
| 100 |
+
|
| 101 |
+
model.eval()
|
| 102 |
+
|
| 103 |
+
_models["multimodal_model"] = model
|
| 104 |
+
_models["multimodal_processor"] = processor
|
| 105 |
+
|
| 106 |
+
logging.info("Multimodal expert model loaded successfully")
|
| 107 |
+
return processor, model
|
| 108 |
+
except Exception as e:
|
| 109 |
+
logging.error(
|
| 110 |
+
f"Failed to load multimodal model {model_name}: {e}. "
|
| 111 |
+
"Falling back to text-only expert model."
|
| 112 |
+
)
|
| 113 |
+
_models["multimodal_model"] = None
|
| 114 |
+
_models["multimodal_processor"] = None
|
| 115 |
+
return None, None
|
| 116 |
|
| 117 |
|
| 118 |
def load_translation_model(model_name: str):
|
requirements.txt
CHANGED
|
@@ -2,7 +2,7 @@ crewai
|
|
| 2 |
langchain
|
| 3 |
langchain-community
|
| 4 |
faiss-cpu
|
| 5 |
-
transformers
|
| 6 |
sentence-transformers
|
| 7 |
pydantic
|
| 8 |
joblib
|
|
@@ -21,4 +21,5 @@ sentencepiece
|
|
| 21 |
fasttext
|
| 22 |
pillow
|
| 23 |
cachetools
|
| 24 |
-
python-multipart
|
|
|
|
|
|
| 2 |
langchain
|
| 3 |
langchain-community
|
| 4 |
faiss-cpu
|
| 5 |
+
transformers @ git+https://github.com/huggingface/transformers
|
| 6 |
sentence-transformers
|
| 7 |
pydantic
|
| 8 |
joblib
|
|
|
|
| 21 |
fasttext
|
| 22 |
pillow
|
| 23 |
cachetools
|
| 24 |
+
python-multipart
|
| 25 |
+
qwen-vl-utils
|