nexusbert commited on
Commit
c7ece81
·
1 Parent(s): 4695df6

Refactor climate advisory agent to support video input and improve model loading. Update requirements to use the latest transformers from GitHub and add new utility dependencies. Clean up code by removing unnecessary comments and enhancing descriptions for clarity.

Browse files
app/agents/climate_agent.py CHANGED
@@ -1,19 +1,23 @@
1
  """
2
  Farmer-First Climate-Resilient Advisory Agent
3
 
4
- Uses a multimodal Qwen-VL model to provide climate-resilient advice to
5
- smallholder farmers based on text, optional photo, and GPS location.
 
 
6
  """
7
 
8
  import io
9
  import logging
 
 
10
  from typing import Optional, Dict, Any
11
 
12
- from PIL import Image
13
  import requests
 
14
 
15
  from app.utils import config
16
- from app.utils.model_manager import load_multimodal_model
17
  from app.utils.memory import memory_store
18
 
19
  logging.basicConfig(
@@ -59,19 +63,35 @@ def _build_weather_context(latitude: Optional[float], longitude: Optional[float]
59
  return ""
60
 
61
 
 
 
 
 
 
 
 
 
 
 
 
62
  def advise_climate_resilient(
63
  query: str,
64
  session_id: str,
65
  latitude: Optional[float] = None,
66
  longitude: Optional[float] = None,
67
  image_bytes: Optional[bytes] = None,
 
68
  ) -> Dict[str, Any]:
69
  """
70
  Run the Farmer-First Climate-Resilient advisory pipeline with optional image + GPS.
71
-
72
- All reasoning is handled by a multimodal Qwen-VL model.
 
 
 
73
  """
74
  processor, model = load_multimodal_model(config.MULTIMODAL_MODEL_NAME)
 
75
 
76
  # Conversation history (text-only, 1-hour TTL shared with core pipeline)
77
  history = memory_store.get_history(session_id) or []
@@ -122,6 +142,7 @@ def advise_climate_resilient(
122
  else "No photo is attached. Use only the text and any weather/location information.\n"
123
  )
124
 
 
125
  prompt_parts = [system_prompt]
126
  if location_context:
127
  prompt_parts.append("\nLOCATION & WEATHER CONTEXT:\n")
@@ -141,39 +162,82 @@ def advise_climate_resilient(
141
 
142
  full_prompt = "".join(prompt_parts)
143
 
144
- # Prepare multimodal inputs
145
- inputs = None
146
- image = None
147
- if image_bytes:
148
- try:
149
- image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
150
- except Exception as e:
151
- logging.warning(f"Failed to decode image bytes, falling back to text-only: {e}")
152
- image = None
153
-
154
- if image is not None:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
  inputs = processor(
156
- text=full_prompt,
157
- images=image,
 
 
158
  return_tensors="pt",
159
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  else:
161
- inputs = processor(
162
- text=full_prompt,
 
 
 
163
  return_tensors="pt",
 
 
 
 
 
 
164
  )
165
-
166
- inputs = {k: v.to(model.device) for k, v in inputs.items()}
167
-
168
- generated_ids = model.generate(
169
- **inputs,
170
- max_new_tokens=512,
171
- temperature=0.4,
172
- top_p=0.9,
173
- )
174
-
175
- outputs = processor.batch_decode(generated_ids, skip_special_tokens=True)
176
- answer = (outputs[0] if outputs else "").strip()
177
 
178
  # Save to shared memory history
179
  history.append({"role": "user", "content": query})
@@ -185,8 +249,9 @@ def advise_climate_resilient(
185
  "answer": answer,
186
  "latitude": latitude,
187
  "longitude": longitude,
188
- "used_image": bool(image is not None),
189
- "model_used": config.MULTIMODAL_MODEL_NAME,
 
190
  }
191
 
192
 
 
1
  """
2
  Farmer-First Climate-Resilient Advisory Agent
3
 
4
+ Uses a multimodal Qwen2-VL model (when available) to provide
5
+ climate-resilient advice to smallholder farmers from text, optional
6
+ photo/video, and GPS location. Falls back to text-only Qwen on
7
+ environments where Qwen2-VL cannot be fully initialized.
8
  """
9
 
10
  import io
11
  import logging
12
+ import os
13
+ import tempfile
14
  from typing import Optional, Dict, Any
15
 
 
16
  import requests
17
+ from qwen_vl_utils import process_vision_info
18
 
19
  from app.utils import config
20
+ from app.utils.model_manager import load_multimodal_model, load_expert_model
21
  from app.utils.memory import memory_store
22
 
23
  logging.basicConfig(
 
63
  return ""
64
 
65
 
66
+ def _save_temp_file(data: bytes, suffix: str) -> str:
67
+ """
68
+ Save bytes to a temporary file and return a file:// URI for Qwen2-VL.
69
+ """
70
+ tmp = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
71
+ tmp.write(data)
72
+ tmp.flush()
73
+ tmp.close()
74
+ return f"file://{tmp.name}"
75
+
76
+
77
  def advise_climate_resilient(
78
  query: str,
79
  session_id: str,
80
  latitude: Optional[float] = None,
81
  longitude: Optional[float] = None,
82
  image_bytes: Optional[bytes] = None,
83
+ video_bytes: Optional[bytes] = None,
84
  ) -> Dict[str, Any]:
85
  """
86
  Run the Farmer-First Climate-Resilient advisory pipeline with optional image + GPS.
87
+
88
+ Tries to use a multimodal Qwen-VL model when available; if the
89
+ multimodal stack cannot be loaded on this environment, gracefully
90
+ falls back to text-only Qwen while still using location/weather
91
+ context.
92
  """
93
  processor, model = load_multimodal_model(config.MULTIMODAL_MODEL_NAME)
94
+ use_multimodal = processor is not None and model is not None
95
 
96
  # Conversation history (text-only, 1-hour TTL shared with core pipeline)
97
  history = memory_store.get_history(session_id) or []
 
142
  else "No photo is attached. Use only the text and any weather/location information.\n"
143
  )
144
 
145
+ # Build a single user text block that includes context + question.
146
  prompt_parts = [system_prompt]
147
  if location_context:
148
  prompt_parts.append("\nLOCATION & WEATHER CONTEXT:\n")
 
162
 
163
  full_prompt = "".join(prompt_parts)
164
 
165
+ # Multimodal path (if supported)
166
+ answer = ""
167
+ used_image_flag = False
168
+ used_video_flag = False
169
+
170
+ if use_multimodal:
171
+ # Build Qwen2-VL messages following official pattern
172
+ image_uri = _save_temp_file(image_bytes, ".jpg") if image_bytes else None
173
+ video_uri = _save_temp_file(video_bytes, ".mp4") if video_bytes else None
174
+
175
+ user_content = []
176
+ if image_uri:
177
+ user_content.append({"type": "image", "image": image_uri})
178
+ used_image_flag = True
179
+ if video_uri:
180
+ user_content.append(
181
+ {
182
+ "type": "video",
183
+ "video": video_uri,
184
+ "fps": 1.0,
185
+ }
186
+ )
187
+ used_video_flag = True
188
+
189
+ user_content.append({"type": "text", "text": full_prompt})
190
+
191
+ messages = [
192
+ {"role": "system", "content": system_prompt},
193
+ {"role": "user", "content": user_content},
194
+ ]
195
+
196
+ text_prompt = processor.apply_chat_template(
197
+ messages, tokenize=False, add_generation_prompt=True
198
+ )
199
+ image_inputs, video_inputs = process_vision_info(messages)
200
+
201
  inputs = processor(
202
+ text=[text_prompt],
203
+ images=image_inputs,
204
+ videos=video_inputs,
205
+ padding=True,
206
  return_tensors="pt",
207
  )
208
+ inputs = {k: v.to(model.device) for k, v in inputs.items()}
209
+
210
+ generated_ids = model.generate(
211
+ **inputs,
212
+ max_new_tokens=512,
213
+ temperature=0.4,
214
+ top_p=0.9,
215
+ )
216
+ generated_ids_trimmed = [
217
+ out_ids[len(in_ids) :]
218
+ for in_ids, out_ids in zip(inputs["input_ids"], generated_ids)
219
+ ]
220
+ outputs = processor.batch_decode(
221
+ generated_ids_trimmed,
222
+ skip_special_tokens=True,
223
+ clean_up_tokenization_spaces=False,
224
+ )
225
+ answer = (outputs[0] if outputs else "").strip()
226
  else:
227
+ # Fallback: text-only Qwen expert model, still using climate-aware prompt
228
+ logging.info("Multimodal model unavailable; using text-only expert model for /advise.")
229
+ tokenizer, text_model = load_expert_model(config.EXPERT_MODEL_NAME, use_quantization=True)
230
+ inputs = tokenizer(
231
+ full_prompt,
232
  return_tensors="pt",
233
+ ).to(text_model.device)
234
+ generated_ids = text_model.generate(
235
+ **inputs,
236
+ max_new_tokens=512,
237
+ temperature=0.4,
238
+ top_p=0.9,
239
  )
240
+ answer = tokenizer.decode(generated_ids[0], skip_special_tokens=True).strip()
 
 
 
 
 
 
 
 
 
 
 
241
 
242
  # Save to shared memory history
243
  history.append({"role": "user", "content": query})
 
249
  "answer": answer,
250
  "latitude": latitude,
251
  "longitude": longitude,
252
+ "used_image": bool(used_image_flag),
253
+ "used_video": bool(used_video_flag),
254
+ "model_used": config.MULTIMODAL_MODEL_NAME if use_multimodal else config.EXPERT_MODEL_NAME,
255
  }
256
 
257
 
app/agents/crew_pipeline.py CHANGED
@@ -1,4 +1,3 @@
1
- # Aglimate/app/agents/crew_pipeline.py
2
  import os
3
  import sys
4
  import re
@@ -13,21 +12,14 @@ from huggingface_hub import hf_hub_download
13
  from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSeq2SeqLM, NllbTokenizer
14
  from sentence_transformers import SentenceTransformer
15
  from app.utils import config
16
- from app.utils.memory import memory_store # memory module
17
  from typing import List
18
 
19
 
20
- hf_cache = "/models/huggingface"
21
- os.environ["HF_HOME"] = hf_cache
22
- os.environ["TRANSFORMERS_CACHE"] = hf_cache
23
- os.environ["HUGGINGFACE_HUB_CACHE"] = hf_cache
24
- os.makedirs(hf_cache, exist_ok=True)
25
-
26
  BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
27
  if BASE_DIR not in sys.path:
28
  sys.path.insert(0, BASE_DIR)
29
 
30
- # Lazy loading - models loaded on demand via model_manager
31
  from app.utils.model_manager import (
32
  load_expert_model,
33
  load_translation_model,
@@ -37,9 +29,7 @@ from app.utils.model_manager import (
37
  get_device
38
  )
39
 
40
- DEVICE = get_device() # Always CPU for HuggingFace Spaces
41
-
42
- # Models will be loaded lazily when needed
43
  _tokenizer = None
44
  _model = None
45
  _embedder = None
@@ -50,7 +40,6 @@ _classifier = None
50
 
51
 
52
  def get_expert_model():
53
- """Lazy load expert model."""
54
  global _tokenizer, _model
55
  if _tokenizer is None or _model is None:
56
  _tokenizer, _model = load_expert_model(config.EXPERT_MODEL_NAME, use_quantization=True)
@@ -58,7 +47,6 @@ def get_expert_model():
58
 
59
 
60
  def get_embedder():
61
- """Lazy load embedder."""
62
  global _embedder
63
  if _embedder is None:
64
  _embedder = load_embedder(config.EMBEDDING_MODEL)
@@ -66,7 +54,6 @@ def get_embedder():
66
 
67
 
68
  def get_lang_identifier():
69
- """Lazy load language identifier."""
70
  global _lang_identifier
71
  if _lang_identifier is None:
72
  _lang_identifier = load_lang_identifier(
@@ -77,7 +64,6 @@ def get_lang_identifier():
77
 
78
 
79
  def get_translation_model():
80
- """Lazy load translation model."""
81
  global _translation_tokenizer, _translation_model
82
  if _translation_tokenizer is None or _translation_model is None:
83
  _translation_tokenizer, _translation_model = load_translation_model(config.TRANSLATION_MODEL_NAME)
@@ -85,7 +71,6 @@ def get_translation_model():
85
 
86
 
87
  def get_classifier():
88
- """Lazy load classifier."""
89
  global _classifier
90
  if _classifier is None:
91
  _classifier = load_classifier(config.CLASSIFIER_PATH)
@@ -99,8 +84,6 @@ def detect_language(text: str, top_k: int = 1):
99
  labels, probs = lang_identifier.predict(clean_text, k=top_k)
100
  return [(l.replace("__label__", ""), float(p)) for l, p in zip(labels, probs)]
101
 
102
- # Translation model loaded lazily via get_translation_model()
103
-
104
  SUPPORTED_LANGS = {
105
  "eng_Latn": "English",
106
  "ibo_Latn": "Igbo",
@@ -110,7 +93,6 @@ SUPPORTED_LANGS = {
110
  "amh_Latn": "Amharic",
111
  }
112
 
113
- # Text chunking
114
  _SENTENCE_SPLIT_RE = re.compile(r'(?<=[.!?])\s+')
115
 
116
  def chunk_text(text: str, max_len: int = 400) -> List[str]:
 
 
1
  import os
2
  import sys
3
  import re
 
12
  from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSeq2SeqLM, NllbTokenizer
13
  from sentence_transformers import SentenceTransformer
14
  from app.utils import config
15
+ from app.utils.memory import memory_store
16
  from typing import List
17
 
18
 
 
 
 
 
 
 
19
  BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
20
  if BASE_DIR not in sys.path:
21
  sys.path.insert(0, BASE_DIR)
22
 
 
23
  from app.utils.model_manager import (
24
  load_expert_model,
25
  load_translation_model,
 
29
  get_device
30
  )
31
 
32
+ DEVICE = get_device()
 
 
33
  _tokenizer = None
34
  _model = None
35
  _embedder = None
 
40
 
41
 
42
  def get_expert_model():
 
43
  global _tokenizer, _model
44
  if _tokenizer is None or _model is None:
45
  _tokenizer, _model = load_expert_model(config.EXPERT_MODEL_NAME, use_quantization=True)
 
47
 
48
 
49
  def get_embedder():
 
50
  global _embedder
51
  if _embedder is None:
52
  _embedder = load_embedder(config.EMBEDDING_MODEL)
 
54
 
55
 
56
  def get_lang_identifier():
 
57
  global _lang_identifier
58
  if _lang_identifier is None:
59
  _lang_identifier = load_lang_identifier(
 
64
 
65
 
66
  def get_translation_model():
 
67
  global _translation_tokenizer, _translation_model
68
  if _translation_tokenizer is None or _translation_model is None:
69
  _translation_tokenizer, _translation_model = load_translation_model(config.TRANSLATION_MODEL_NAME)
 
71
 
72
 
73
  def get_classifier():
 
74
  global _classifier
75
  if _classifier is None:
76
  _classifier = load_classifier(config.CLASSIFIER_PATH)
 
84
  labels, probs = lang_identifier.predict(clean_text, k=top_k)
85
  return [(l.replace("__label__", ""), float(p)) for l, p in zip(labels, probs)]
86
 
 
 
87
  SUPPORTED_LANGS = {
88
  "eng_Latn": "English",
89
  "ibo_Latn": "Igbo",
 
93
  "amh_Latn": "Amharic",
94
  }
95
 
 
96
  _SENTENCE_SPLIT_RE = re.compile(r'(?<=[.!?])\s+')
97
 
98
  def chunk_text(text: str, max_len: int = 400) -> List[str]:
app/main.py CHANGED
@@ -1,4 +1,3 @@
1
- # Aglimate_backend/app/main.py
2
  import os
3
  import sys
4
  import logging
@@ -47,7 +46,6 @@ def startup_event():
47
 
48
  @app.get("/")
49
  def home():
50
- """Health check endpoint."""
51
  return {
52
  "status": "Aglimate climate-resilient backend running",
53
  "version": "2.0.0",
@@ -94,7 +92,7 @@ async def advise_climate_resilient_endpoint(
94
  ),
95
  video: Optional[UploadFile] = File(
96
  None,
97
- description="Optional short field video (currently accepted but not yet analyzed; reserved for future use)",
98
  ),
99
  ):
100
  """
@@ -110,9 +108,8 @@ async def advise_climate_resilient_endpoint(
110
  if not session_id:
111
  session_id = str(uuid.uuid4())
112
 
113
- image_bytes = None
114
- if photo is not None:
115
- image_bytes = await photo.read()
116
 
117
  result = advise_climate_resilient(
118
  query=query,
@@ -120,12 +117,9 @@ async def advise_climate_resilient_endpoint(
120
  latitude=latitude,
121
  longitude=longitude,
122
  image_bytes=image_bytes,
 
123
  )
124
 
125
- # video is currently accepted but ignored; kept for forward-compatibility
126
- if video is not None:
127
- result["video_attached"] = True
128
-
129
  return result
130
 
131
  if __name__ == "__main__":
 
 
1
  import os
2
  import sys
3
  import logging
 
46
 
47
  @app.get("/")
48
  def home():
 
49
  return {
50
  "status": "Aglimate climate-resilient backend running",
51
  "version": "2.0.0",
 
92
  ),
93
  video: Optional[UploadFile] = File(
94
  None,
95
+ description="Optional short field video of the farm (optional)",
96
  ),
97
  ):
98
  """
 
108
  if not session_id:
109
  session_id = str(uuid.uuid4())
110
 
111
+ image_bytes = await photo.read() if photo is not None else None
112
+ video_bytes = await video.read() if video is not None else None
 
113
 
114
  result = advise_climate_resilient(
115
  query=query,
 
117
  latitude=latitude,
118
  longitude=longitude,
119
  image_bytes=image_bytes,
120
+ video_bytes=video_bytes,
121
  )
122
 
 
 
 
 
123
  return result
124
 
125
  if __name__ == "__main__":
app/utils/config.py CHANGED
@@ -1,5 +1,3 @@
1
- #
2
- # TerraSyncra_backend/app/utils/config.py
3
  from pathlib import Path
4
  import os
5
  import sys
@@ -26,8 +24,6 @@ CLASSIFIER_CONFIDENCE_THRESHOLD = float(os.getenv("CLASSIFIER_CONFIDENCE_THRESHO
26
 
27
 
28
  EXPERT_MODEL_NAME = os.getenv("EXPERT_MODEL_NAME", "Qwen/Qwen1.5-1.8B")
29
-
30
- # Multimodal expert model (Qwen-VL) for image-aware advisory
31
  MULTIMODAL_MODEL_NAME = os.getenv("MULTIMODAL_MODEL_NAME", "Qwen/Qwen2-VL-2B-Instruct")
32
 
33
  LANG_ID_MODEL_REPO = os.getenv("LANG_ID_MODEL_REPO", "facebook/fasttext-language-identification")
 
 
 
1
  from pathlib import Path
2
  import os
3
  import sys
 
24
 
25
 
26
  EXPERT_MODEL_NAME = os.getenv("EXPERT_MODEL_NAME", "Qwen/Qwen1.5-1.8B")
 
 
27
  MULTIMODAL_MODEL_NAME = os.getenv("MULTIMODAL_MODEL_NAME", "Qwen/Qwen2-VL-2B-Instruct")
28
 
29
  LANG_ID_MODEL_REPO = os.getenv("LANG_ID_MODEL_REPO", "facebook/fasttext-language-identification")
app/utils/model_manager.py CHANGED
@@ -1,8 +1,3 @@
1
- # TerraSyncra/app/utils/model_manager.py
2
- """
3
- Lazy Model Manager for CPU Optimization
4
- Loads models on-demand instead of at import time.
5
- """
6
  import os
7
  import logging
8
  import torch
@@ -11,7 +6,6 @@ from functools import lru_cache
11
 
12
  logging.basicConfig(level=logging.INFO)
13
 
14
- # Global model cache
15
  _models = {
16
  "expert_model": None,
17
  "expert_tokenizer": None,
@@ -24,22 +18,14 @@ _models = {
24
  "classifier": None,
25
  }
26
 
27
- _device = "cpu" # Force CPU for HuggingFace Spaces
28
 
29
 
30
  def get_device():
31
- """Always return CPU for HuggingFace Spaces."""
32
  return _device
33
 
34
 
35
  def load_expert_model(model_name: str, use_quantization: bool = True):
36
- """
37
- Lazy load expert model with optional quantization.
38
-
39
- Args:
40
- model_name: Model identifier
41
- use_quantization: Use INT8 quantization for CPU (recommended)
42
- """
43
  if _models["expert_model"] is not None:
44
  return _models["expert_tokenizer"], _models["expert_model"]
45
 
@@ -48,25 +34,20 @@ def load_expert_model(model_name: str, use_quantization: bool = True):
48
 
49
  logging.info(f"Loading expert model ({model_name})...")
50
 
51
- # Get cache directory from config
52
  cache_dir = getattr(config, 'hf_cache', '/models/huggingface')
53
 
54
  tokenizer = AutoTokenizer.from_pretrained(
55
  model_name,
56
- use_fast=True, # Use fast tokenizer
57
  cache_dir=cache_dir
58
  )
59
 
60
- # Load model with CPU optimizations
61
  model_kwargs = {
62
- "torch_dtype": torch.float32, # Use float32 for CPU
63
  "device_map": "cpu",
64
  "low_cpu_mem_usage": True,
65
  }
66
 
67
- # Note: For CPU, we use float32 (most compatible)
68
- # For quantization on CPU, consider using smaller models or ONNX runtime
69
- # BitsAndBytesConfig is GPU-only, so we skip it for CPU deployment
70
  logging.info("Loading model in float32 for CPU compatibility")
71
 
72
  cache_dir = getattr(config, 'hf_cache', '/models/huggingface')
@@ -77,7 +58,7 @@ def load_expert_model(model_name: str, use_quantization: bool = True):
77
  **model_kwargs
78
  )
79
 
80
- model.eval() # Set to evaluation mode
81
 
82
  _models["expert_model"] = model
83
  _models["expert_tokenizer"] = tokenizer
@@ -88,43 +69,50 @@ def load_expert_model(model_name: str, use_quantization: bool = True):
88
 
89
  def load_multimodal_model(model_name: str):
90
  """
91
- Lazy load multimodal Qwen-VL model (vision-language).
92
- Used for photo-aware advisory.
93
  """
94
  if _models["multimodal_model"] is not None:
95
  return _models["multimodal_processor"], _models["multimodal_model"]
96
 
97
- # Note: current transformers build on HF Spaces may not expose AutoModelForVision2Seq.
98
- # We rely on Qwen's remote code with AutoModelForCausalLM instead.
99
- from transformers import AutoProcessor, AutoModelForCausalLM
100
  from app.utils import config
101
 
102
  logging.info(f"Loading multimodal expert model ({model_name})...")
103
 
104
  cache_dir = getattr(config, "hf_cache", "/models/huggingface")
105
 
106
- processor = AutoProcessor.from_pretrained(
107
- model_name,
108
- cache_dir=cache_dir,
109
- trust_remote_code=True,
110
- )
111
-
112
- model = AutoModelForCausalLM.from_pretrained(
113
- model_name,
114
- torch_dtype=torch.float32,
115
- cache_dir=cache_dir,
116
- device_map="cpu",
117
- low_cpu_mem_usage=True,
118
- trust_remote_code=True,
119
- )
120
-
121
- model.eval()
122
-
123
- _models["multimodal_model"] = model
124
- _models["multimodal_processor"] = processor
125
-
126
- logging.info("Multimodal expert model loaded successfully")
127
- return processor, model
 
 
 
 
 
 
 
128
 
129
 
130
  def load_translation_model(model_name: str):
 
 
 
 
 
 
1
  import os
2
  import logging
3
  import torch
 
6
 
7
  logging.basicConfig(level=logging.INFO)
8
 
 
9
  _models = {
10
  "expert_model": None,
11
  "expert_tokenizer": None,
 
18
  "classifier": None,
19
  }
20
 
21
+ _device = "cpu"
22
 
23
 
24
  def get_device():
 
25
  return _device
26
 
27
 
28
  def load_expert_model(model_name: str, use_quantization: bool = True):
 
 
 
 
 
 
 
29
  if _models["expert_model"] is not None:
30
  return _models["expert_tokenizer"], _models["expert_model"]
31
 
 
34
 
35
  logging.info(f"Loading expert model ({model_name})...")
36
 
 
37
  cache_dir = getattr(config, 'hf_cache', '/models/huggingface')
38
 
39
  tokenizer = AutoTokenizer.from_pretrained(
40
  model_name,
41
+ use_fast=True,
42
  cache_dir=cache_dir
43
  )
44
 
 
45
  model_kwargs = {
46
+ "torch_dtype": torch.float32,
47
  "device_map": "cpu",
48
  "low_cpu_mem_usage": True,
49
  }
50
 
 
 
 
51
  logging.info("Loading model in float32 for CPU compatibility")
52
 
53
  cache_dir = getattr(config, 'hf_cache', '/models/huggingface')
 
58
  **model_kwargs
59
  )
60
 
61
+ model.eval()
62
 
63
  _models["expert_model"] = model
64
  _models["expert_tokenizer"] = tokenizer
 
69
 
70
  def load_multimodal_model(model_name: str):
71
  """
72
+ Lazy load multimodal Qwen2-VL model (vision-language).
73
+ Used for photo/video-aware advisory.
74
  """
75
  if _models["multimodal_model"] is not None:
76
  return _models["multimodal_processor"], _models["multimodal_model"]
77
 
78
+ # With latest transformers + qwen-vl-utils, Qwen2VLForConditionalGeneration
79
+ # and AutoProcessor support full image/video chat as in official docs.
80
+ from transformers import AutoProcessor, Qwen2VLForConditionalGeneration
81
  from app.utils import config
82
 
83
  logging.info(f"Loading multimodal expert model ({model_name})...")
84
 
85
  cache_dir = getattr(config, "hf_cache", "/models/huggingface")
86
 
87
+ try:
88
+ processor = AutoProcessor.from_pretrained(
89
+ model_name,
90
+ cache_dir=cache_dir,
91
+ )
92
+
93
+ model = Qwen2VLForConditionalGeneration.from_pretrained(
94
+ model_name,
95
+ torch_dtype=torch.float32, # CPU deployment
96
+ cache_dir=cache_dir,
97
+ device_map="cpu",
98
+ low_cpu_mem_usage=True,
99
+ )
100
+
101
+ model.eval()
102
+
103
+ _models["multimodal_model"] = model
104
+ _models["multimodal_processor"] = processor
105
+
106
+ logging.info("Multimodal expert model loaded successfully")
107
+ return processor, model
108
+ except Exception as e:
109
+ logging.error(
110
+ f"Failed to load multimodal model {model_name}: {e}. "
111
+ "Falling back to text-only expert model."
112
+ )
113
+ _models["multimodal_model"] = None
114
+ _models["multimodal_processor"] = None
115
+ return None, None
116
 
117
 
118
  def load_translation_model(model_name: str):
requirements.txt CHANGED
@@ -2,7 +2,7 @@ crewai
2
  langchain
3
  langchain-community
4
  faiss-cpu
5
- transformers>=4.51.0
6
  sentence-transformers
7
  pydantic
8
  joblib
@@ -21,4 +21,5 @@ sentencepiece
21
  fasttext
22
  pillow
23
  cachetools
24
- python-multipart
 
 
2
  langchain
3
  langchain-community
4
  faiss-cpu
5
+ transformers @ git+https://github.com/huggingface/transformers
6
  sentence-transformers
7
  pydantic
8
  joblib
 
21
  fasttext
22
  pillow
23
  cachetools
24
+ python-multipart
25
+ qwen-vl-utils