Fola-AI commited on
Commit
068e72c
·
1 Parent(s): d9a0eb4

Use official N-ATLaS via transformers - no llama-cpp-python

Browse files
Files changed (4) hide show
  1. .DS_Store +0 -0
  2. Dockerfile +6 -7
  3. models/natlas_model.py +298 -467
  4. requirements.txt +12 -6
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
Dockerfile CHANGED
@@ -1,8 +1,8 @@
1
  # =============================================================================
2
- # FarmEyes - HuggingFace Spaces Dockerfile (GPU - Simplified)
3
  # =============================================================================
4
- # Uses PyTorch base image with CUDA pre-installed
5
- # llama-cpp-python CPU version works fine - still faster than free tier
6
  # =============================================================================
7
 
8
  FROM pytorch/pytorch:2.1.0-cuda11.8-cudnn8-runtime
@@ -21,17 +21,16 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
21
  libsm6 \
22
  libxext6 \
23
  libgl1 \
 
24
  && rm -rf /var/lib/apt/lists/*
25
 
26
  # Copy requirements
27
  COPY requirements.txt .
28
 
29
- # Install Python dependencies (torch already in base image)
 
30
  RUN pip install --no-cache-dir -r requirements.txt
31
 
32
- # Install llama-cpp-python (CPU version - avoids long compile)
33
- RUN pip install --no-cache-dir llama-cpp-python
34
-
35
  # Copy application code
36
  COPY . .
37
 
 
1
  # =============================================================================
2
+ # FarmEyes - HuggingFace Spaces Dockerfile (Transformers Version)
3
  # =============================================================================
4
+ # Uses official N-ATLaS model via transformers - NO llama-cpp-python needed!
5
+ # Fast build, official model support, GPU accelerated.
6
  # =============================================================================
7
 
8
  FROM pytorch/pytorch:2.1.0-cuda11.8-cudnn8-runtime
 
21
  libsm6 \
22
  libxext6 \
23
  libgl1 \
24
+ git \
25
  && rm -rf /var/lib/apt/lists/*
26
 
27
  # Copy requirements
28
  COPY requirements.txt .
29
 
30
+ # Install Python dependencies
31
+ # Note: torch is already in base image
32
  RUN pip install --no-cache-dir -r requirements.txt
33
 
 
 
 
34
  # Copy application code
35
  COPY . .
36
 
models/natlas_model.py CHANGED
@@ -1,19 +1,16 @@
1
  """
2
- FarmEyes N-ATLaS Model Integration (Hybrid)
3
- ============================================
4
- HYBRID APPROACH:
5
- 1. PRIMARY: HuggingFace Inference API (fast, cloud-based)
6
- 2. FALLBACK: Local GGUF model (optional - requires llama-cpp-python)
7
 
8
- API Model: NCAIR1/N-ATLaS
9
- GGUF Model: tosinamuda/N-ATLaS-GGUF (N-ATLaS-GGUF-Q4_K_M.gguf)
10
-
11
- HUGGINGFACE SPACES OPTIMIZED:
12
- - llama-cpp-python is OPTIONAL (avoids build timeout)
13
- - Works with HuggingFace API only if GGUF not available
14
- - GPU support when available
15
 
16
  Languages: English, Hausa, Yoruba, Igbo
 
 
 
17
  """
18
 
19
  import os
@@ -22,55 +19,37 @@ from pathlib import Path
22
  from typing import Optional, Dict, List
23
  import logging
24
  import time
 
25
 
26
  logging.basicConfig(level=logging.INFO)
27
  logger = logging.getLogger(__name__)
28
 
29
 
30
- # =============================================================================
31
- # CHECK IF LLAMA-CPP-PYTHON IS AVAILABLE
32
- # =============================================================================
33
-
34
- LLAMA_CPP_AVAILABLE = False
35
- try:
36
- from llama_cpp import Llama
37
- LLAMA_CPP_AVAILABLE = True
38
- logger.info("✅ llama-cpp-python is available - GGUF fallback enabled")
39
- except ImportError:
40
- logger.warning("⚠️ llama-cpp-python not installed - GGUF fallback disabled")
41
- logger.warning(" App will use HuggingFace API only for translations")
42
-
43
-
44
  # =============================================================================
45
  # ENVIRONMENT DETECTION
46
  # =============================================================================
47
 
48
- # Check if running on HuggingFace Spaces
49
  IS_HF_SPACES = os.environ.get("SPACE_ID") is not None
50
 
51
  # Check for GPU
52
  HAS_GPU = False
 
53
  try:
54
  import torch
55
  HAS_GPU = torch.cuda.is_available()
56
  if HAS_GPU:
57
- logger.info("🎮 GPU detected - GPU acceleration enabled")
 
 
 
58
  except ImportError:
59
- pass
60
-
61
- # Set GPU layers based on environment
62
- if HAS_GPU:
63
- DEFAULT_GPU_LAYERS = -1 # Use all GPU layers
64
- logger.info("🎮 Using GPU acceleration")
65
- elif IS_HF_SPACES:
66
- DEFAULT_GPU_LAYERS = 0 # CPU only on Spaces free tier
67
- logger.info("🤗 Running on HuggingFace Spaces - CPU mode")
68
  else:
69
- DEFAULT_GPU_LAYERS = -1 # Try GPU locally (Apple Silicon MPS)
70
  logger.info("🖥️ Running locally")
71
 
72
- DEFAULT_THREADS = 4
73
-
74
 
75
  # =============================================================================
76
  # LANGUAGE MAPPINGS
@@ -92,291 +71,100 @@ NATIVE_LANGUAGE_NAMES = {
92
 
93
 
94
  # =============================================================================
95
- # HUGGINGFACE INFERENCE API CLIENT (PRIMARY)
96
  # =============================================================================
97
 
98
- class HuggingFaceAPIClient:
99
  """
100
- Client for HuggingFace Serverless Inference API.
101
- Primary method - fast cloud-based inference.
102
-
103
- NOTE: This is the MAIN method on HuggingFace Spaces when
104
- llama-cpp-python is not installed.
105
- """
106
-
107
- MODEL_ID = "NCAIR1/N-ATLaS"
108
- API_URL = "https://api-inference.huggingface.co/models/NCAIR1/N-ATLaS"
109
-
110
- def __init__(self, api_token: Optional[str] = None):
111
- self.api_token = api_token or os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN")
112
- self._is_available = None
113
- self._last_check = 0
114
- self._check_interval = 300 # 5 minutes
115
-
116
- if self.api_token:
117
- logger.info("✅ HuggingFace API token found")
118
- else:
119
- logger.warning("⚠️ No HF_TOKEN set - translations may not work")
120
-
121
- def is_available(self) -> bool:
122
- """Check if API is available."""
123
- if not self.api_token:
124
- return False
125
-
126
- current_time = time.time()
127
- if self._is_available is not None and current_time - self._last_check < self._check_interval:
128
- return self._is_available
129
-
130
- try:
131
- import requests
132
-
133
- headers = {"Authorization": "Bearer " + self.api_token}
134
- response = requests.get(
135
- "https://huggingface.co/api/models/" + self.MODEL_ID,
136
- headers=headers,
137
- timeout=10
138
- )
139
-
140
- self._is_available = response.status_code == 200
141
- self._last_check = current_time
142
-
143
- if self._is_available:
144
- logger.info("✅ HuggingFace API is available")
145
- else:
146
- logger.warning("⚠️ HuggingFace API unavailable: " + str(response.status_code))
147
-
148
- return self._is_available
149
-
150
- except Exception as e:
151
- logger.warning("⚠️ API check failed: " + str(e))
152
- self._is_available = False
153
- self._last_check = current_time
154
- return False
155
 
156
- def generate(
157
- self,
158
- prompt: str,
159
- max_new_tokens: int = 512,
160
- temperature: float = 0.7,
161
- top_p: float = 0.9
162
- ) -> Optional[str]:
163
- """Generate text using HuggingFace Inference API."""
164
- if not self.api_token:
165
- return None
166
-
167
- try:
168
- import requests
169
-
170
- headers = {
171
- "Authorization": "Bearer " + self.api_token,
172
- "Content-Type": "application/json"
173
- }
174
-
175
- payload = {
176
- "inputs": prompt,
177
- "parameters": {
178
- "max_new_tokens": max_new_tokens,
179
- "temperature": temperature,
180
- "top_p": top_p,
181
- "do_sample": True,
182
- "return_full_text": False
183
- },
184
- "options": {
185
- "wait_for_model": True
186
- }
187
- }
188
-
189
- logger.info("📡 Calling HuggingFace Inference API...")
190
-
191
- response = requests.post(
192
- self.API_URL,
193
- headers=headers,
194
- json=payload,
195
- timeout=120
196
- )
197
-
198
- if response.status_code == 200:
199
- result = response.json()
200
- if isinstance(result, list) and len(result) > 0:
201
- text = result[0].get("generated_text", "")
202
- if text:
203
- logger.info("✅ API generation successful: " + str(len(text)) + " chars")
204
- return text
205
- return None
206
- else:
207
- logger.warning("⚠️ API request failed: " + str(response.status_code))
208
- return None
209
-
210
- except Exception as e:
211
- logger.error("❌ API call failed: " + str(e))
212
- return None
213
-
214
- def translate(self, text: str, target_language: str) -> Optional[str]:
215
- """Translate text using the API."""
216
- if target_language == "en" or not text:
217
- return text
218
-
219
- lang_name = LANGUAGE_NAMES.get(target_language, target_language)
220
- prompt = "Translate to " + lang_name + ": " + text
221
-
222
- result = self.generate(prompt, max_new_tokens=len(text) * 3, temperature=0.3)
223
-
224
- if result:
225
- result = result.strip()
226
- # Clean up prefixes
227
- for prefix in [lang_name + ":", "Translation:"]:
228
- if result.lower().startswith(prefix.lower()):
229
- result = result[len(prefix):].strip()
230
- return result
231
-
232
- return None
233
-
234
- def chat_response(self, message: str, context: Dict, language: str = "en") -> Optional[str]:
235
- """Generate chat response using API."""
236
- crop = context.get("crop_type", "crop").capitalize()
237
- disease = context.get("disease_name", "unknown disease")
238
- severity = context.get("severity_level", "unknown")
239
- confidence = context.get("confidence", 0)
240
- if confidence <= 1:
241
- confidence = int(confidence * 100)
242
-
243
- lang_instructions = {
244
- "ha": "Respond in Hausa language.",
245
- "yo": "Respond in Yoruba language.",
246
- "ig": "Respond in Igbo language."
247
- }
248
- lang_instruction = lang_instructions.get(language, "Respond in English.")
249
-
250
- prompt = (
251
- "You are FarmEyes, an AI assistant helping African farmers with crop diseases.\n\n"
252
- "Current diagnosis:\n"
253
- "- Crop: " + crop + "\n"
254
- "- Disease: " + disease + "\n"
255
- "- Severity: " + severity + "\n"
256
- "- Confidence: " + str(confidence) + "%\n\n"
257
- + lang_instruction + "\n\n"
258
- "Farmer's question: " + message + "\n\n"
259
- "Provide a helpful, practical response about this disease or related farming advice. "
260
- "Keep it concise (2-3 paragraphs max)."
261
- )
262
-
263
- return self.generate(prompt, max_new_tokens=400, temperature=0.7)
264
-
265
-
266
- # =============================================================================
267
- # LOCAL GGUF MODEL (FALLBACK - OPTIONAL)
268
- # =============================================================================
269
-
270
- class LocalGGUFModel:
271
- """
272
- Local GGUF model using llama-cpp-python.
273
- FALLBACK: Only works if llama-cpp-python is installed.
274
 
275
- Model: tosinamuda/N-ATLaS-GGUF
276
- File: N-ATLaS-GGUF-Q4_K_M.gguf (4.92GB)
 
277
  """
278
 
279
- HF_REPO = "tosinamuda/N-ATLaS-GGUF"
280
- MODEL_FILENAME = "N-ATLaS-GGUF-Q4_K_M.gguf"
281
 
282
  def __init__(
283
  self,
284
- model_path: Optional[str] = None,
285
- n_ctx: int = 2048,
286
- n_gpu_layers: int = DEFAULT_GPU_LAYERS,
287
- n_threads: int = DEFAULT_THREADS,
288
- n_batch: int = 256,
289
- verbose: bool = False
290
  ):
291
- self.model_path = model_path
292
- self.n_ctx = n_ctx
293
- self.n_gpu_layers = n_gpu_layers
294
- self.n_threads = n_threads
295
- self.n_batch = n_batch
296
- self.verbose = verbose
297
 
298
  self._model = None
 
299
  self._is_loaded = False
300
 
301
- logger.info(f"GGUF Config: ctx={n_ctx}, gpu_layers={n_gpu_layers}, threads={n_threads}, batch={n_batch}")
302
-
303
- def download_model(self) -> str:
304
- """Download GGUF model from HuggingFace Hub."""
305
- try:
306
- from huggingface_hub import hf_hub_download
307
-
308
- logger.info("=" * 60)
309
- logger.info("📥 DOWNLOADING N-ATLaS GGUF MODEL")
310
- logger.info("=" * 60)
311
- logger.info(f" Repository: {self.HF_REPO}")
312
- logger.info(f" File: {self.MODEL_FILENAME}")
313
- logger.info(f" Size: ~4.92 GB")
314
- logger.info(" This may take 5-15 minutes on first startup...")
315
- logger.info("=" * 60)
316
-
317
- model_path = hf_hub_download(
318
- repo_id=self.HF_REPO,
319
- filename=self.MODEL_FILENAME,
320
- cache_dir=None,
321
- resume_download=True
322
- )
323
-
324
- logger.info("=" * 60)
325
- logger.info("✅ MODEL DOWNLOAD COMPLETE!")
326
- logger.info(f" Path: {model_path}")
327
- logger.info("=" * 60)
328
-
329
- return model_path
330
-
331
- except Exception as e:
332
- logger.error("=" * 60)
333
- logger.error("❌ MODEL DOWNLOAD FAILED!")
334
- logger.error(f" Error: {str(e)}")
335
- logger.error("=" * 60)
336
- raise
337
 
338
  def load_model(self) -> bool:
339
- """Load GGUF model."""
340
  if self._is_loaded:
341
  return True
342
 
343
- # Check if llama-cpp-python is available
344
- if not LLAMA_CPP_AVAILABLE:
345
- logger.warning("❌ Cannot load GGUF - llama-cpp-python not installed")
346
- logger.warning(" App will use HuggingFace API only")
347
- return False
348
-
349
  try:
350
- from llama_cpp import Llama
 
 
 
 
 
 
 
 
 
351
 
352
- # Download if not present
353
- if self.model_path is None or not Path(self.model_path).exists():
354
- logger.info("Model not found locally, downloading...")
355
- self.model_path = self.download_model()
 
 
 
356
 
357
- logger.info("🔄 Loading GGUF model into memory...")
358
- logger.info(f" Path: {self.model_path}")
359
- logger.info(f" GPU Layers: {self.n_gpu_layers}")
360
- logger.info(f" Context: {self.n_ctx}")
 
 
361
 
362
- self._model = Llama(
363
- model_path=self.model_path,
364
- n_ctx=self.n_ctx,
365
- n_gpu_layers=self.n_gpu_layers,
366
- n_threads=self.n_threads,
367
- n_batch=self.n_batch,
368
- verbose=self.verbose
369
  )
370
 
371
  self._is_loaded = True
372
- logger.info("✅ GGUF model loaded successfully!")
 
 
 
 
 
 
 
 
373
  return True
374
 
375
- except ImportError:
376
- logger.error("❌ llama-cpp-python not installed!")
377
- return False
378
  except Exception as e:
379
- logger.error(f"❌ Model load failed: {str(e)}")
 
 
380
  return False
381
 
382
  def unload_model(self):
@@ -384,87 +172,168 @@ class LocalGGUFModel:
384
  if self._model is not None:
385
  del self._model
386
  self._model = None
387
- self._is_loaded = False
388
- logger.info("Model unloaded")
 
 
 
 
 
 
 
 
 
 
 
 
389
 
390
  @property
391
  def is_loaded(self) -> bool:
392
  return self._is_loaded
393
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
394
  def generate(
395
  self,
396
  prompt: str,
397
- max_tokens: int = 512,
 
398
  temperature: float = 0.7,
399
  top_p: float = 0.9,
400
- stop: Optional[List[str]] = None
401
  ) -> Optional[str]:
402
- """Generate text using GGUF model with Llama-3 format."""
403
- if not LLAMA_CPP_AVAILABLE:
404
- logger.warning("GGUF not available - llama-cpp-python not installed")
405
- return None
406
-
407
  if not self._is_loaded:
408
  if not self.load_model():
409
  return None
410
 
411
  try:
412
- formatted_prompt = (
413
- "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n"
414
- "You are a helpful AI assistant for African farmers. You help with crop disease diagnosis, "
415
- "treatment advice, and agricultural questions. Respond in the same language the user writes in."
416
- "<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n"
417
- + prompt +
418
- "<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
419
- )
 
 
 
 
 
 
 
 
 
420
 
421
- response = self._model(
422
- formatted_prompt,
423
- max_tokens=max_tokens,
424
- temperature=temperature,
425
- top_p=top_p,
426
- stop=stop or ["<|eot_id|>", "<|end_of_text|>"],
427
- echo=False
428
  )
429
 
430
- text = response["choices"][0]["text"].strip()
 
 
431
 
432
- for token in ["<|eot_id|>", "<|end_of_text|>", "<|start_header_id|>", "<|end_header_id|>"]:
433
- text = text.replace(token, "")
 
 
 
 
 
 
 
 
 
 
434
 
435
- text = text.strip()
 
436
 
437
- if text:
438
- logger.info(f"✅ GGUF generation: {len(text)} chars")
439
- return text
 
 
440
  else:
441
- logger.warning("⚠️ GGUF returned empty response")
442
- return None
 
 
 
 
 
 
443
 
 
 
 
 
 
 
 
444
  except Exception as e:
445
- logger.error(f"❌ GGUF generation error: {str(e)}")
446
  return None
447
 
448
  def translate(self, text: str, target_language: str) -> Optional[str]:
449
- """Translate text using GGUF model."""
450
- if not LLAMA_CPP_AVAILABLE:
451
- return None
452
-
453
  if target_language == "en" or not text:
454
  return text
455
 
456
  lang_name = LANGUAGE_NAMES.get(target_language, target_language)
457
- prompt = "Translate to " + lang_name + ": " + text
 
 
 
458
 
459
  result = self.generate(
460
- prompt,
461
- max_tokens=len(text) * 4,
462
- temperature=0.3
 
 
463
  )
464
 
465
  if result:
466
  result = result.strip()
467
- for prefix in [lang_name + ":", "Translation:", "In " + lang_name + ":"]:
 
 
 
 
 
 
 
 
468
  if result.lower().startswith(prefix.lower()):
469
  result = result[len(prefix):].strip()
470
  return result
@@ -473,9 +342,6 @@ class LocalGGUFModel:
473
 
474
  def chat_response(self, message: str, context: Dict, language: str = "en") -> Optional[str]:
475
  """Generate chat response with diagnosis context."""
476
- if not LLAMA_CPP_AVAILABLE:
477
- return None
478
-
479
  crop = context.get("crop_type", "crop").capitalize()
480
  disease = context.get("disease_name", "unknown disease")
481
  severity = context.get("severity_level", "unknown")
@@ -483,27 +349,38 @@ class LocalGGUFModel:
483
  if confidence <= 1:
484
  confidence = int(confidence * 100)
485
 
 
486
  lang_instructions = {
487
- "ha": "Respond in Hausa language.",
488
- "yo": "Respond in Yoruba language.",
489
- "ig": "Respond in Igbo language."
 
490
  }
491
  lang_instruction = lang_instructions.get(language, "Respond in English.")
492
 
 
 
 
 
 
 
493
  prompt = (
494
- "You are FarmEyes, an AI assistant helping African farmers with crop diseases.\n\n"
495
- "Current diagnosis:\n"
496
- "- Crop: " + crop + "\n"
497
- "- Disease: " + disease + "\n"
498
- "- Severity: " + severity + "\n"
499
- "- Confidence: " + str(confidence) + "%\n\n"
500
- + lang_instruction + "\n\n"
501
- "Farmer's question: " + message + "\n\n"
502
- "Provide a helpful, practical response about this disease or related farming advice. "
503
- "Keep it concise (2-3 paragraphs max)."
 
 
 
 
 
504
  )
505
-
506
- return self.generate(prompt, max_tokens=400, temperature=0.7)
507
 
508
 
509
  # =============================================================================
@@ -512,162 +389,117 @@ class LocalGGUFModel:
512
 
513
  class NATLaSModel:
514
  """
515
- HYBRID N-ATLaS model.
516
-
517
- Strategy:
518
- 1. Try HuggingFace Inference API first (if token available)
519
- 2. Fall back to local GGUF model (if llama-cpp-python installed)
520
 
521
- On HuggingFace Spaces (without llama-cpp-python):
522
- - Only HuggingFace API is used
523
- - Make sure HF_TOKEN secret is set!
524
  """
525
 
526
  def __init__(
527
  self,
528
- api_token: Optional[str] = None,
529
- prefer_api: bool = True,
530
- auto_load_local: bool = True,
531
- **local_kwargs
532
  ):
533
- self.prefer_api = prefer_api
534
-
535
- # Initialize API client (PRIMARY)
536
- self.api_client = HuggingFaceAPIClient(api_token)
 
 
 
 
 
 
 
 
 
537
 
538
- # Initialize GGUF model (FALLBACK - optional)
539
- self.local_model = LocalGGUFModel(**local_kwargs)
540
 
541
  # Translation cache
542
  self._cache: Dict[str, str] = {}
543
 
544
- # Only try to load GGUF if llama-cpp-python is available
545
- if auto_load_local and LLAMA_CPP_AVAILABLE:
546
- logger.info("🔄 Pre-loading GGUF model for fallback...")
547
- self.local_model.load_model()
548
- elif not LLAMA_CPP_AVAILABLE:
549
- logger.info("ℹ️ GGUF fallback disabled - using API only")
550
-
551
  logger.info("=" * 60)
552
- logger.info("✅ NATLaSModel (Hybrid) initialized")
553
- logger.info(f" API token: {'Yes' if self.api_client.api_token else 'No'}")
554
- logger.info(f" GGUF available: {'Yes' if LLAMA_CPP_AVAILABLE else 'No'}")
555
- logger.info(f" GGUF loaded: {'Yes' if self.local_model.is_loaded else 'No'}")
556
- logger.info(f" GPU available: {'Yes' if HAS_GPU else 'No'}")
557
  logger.info(f" Running on: {'HuggingFace Spaces' if IS_HF_SPACES else 'Local'}")
558
  logger.info("=" * 60)
559
 
560
  @property
561
  def is_loaded(self) -> bool:
562
- return self.api_client.api_token is not None or self.local_model.is_loaded
563
 
564
  def load_model(self) -> bool:
565
- if self.api_client.api_token:
566
- return True
567
- if LLAMA_CPP_AVAILABLE:
568
- return self.local_model.load_model()
569
- return False
570
 
571
  def translate(self, text: str, target_language: str, use_cache: bool = True) -> str:
572
- """
573
- Translate text using hybrid approach.
574
- 1. Try API first
575
- 2. Fall back to GGUF (if available)
576
- """
577
  if target_language == "en" or not text or not text.strip():
578
  return text
579
 
580
  # Check cache
581
- cache_key = target_language + ":" + str(hash(text))
582
  if use_cache and cache_key in self._cache:
 
583
  return self._cache[cache_key]
584
 
585
- result = None
586
-
587
- # Try API first if preferred and available
588
- if self.prefer_api and self.api_client.api_token:
589
- logger.info("📡 Trying API translation...")
590
- result = self.api_client.translate(text, target_language)
591
- if result:
592
- logger.info("✅ API translation successful")
593
-
594
- # Fall back to GGUF (only if available)
595
- if result is None and LLAMA_CPP_AVAILABLE:
596
- logger.info("🔄 Using GGUF for translation (fallback)...")
597
- result = self.local_model.translate(text, target_language)
598
-
599
- # If still no result, return original text
600
- if result is None:
601
- logger.warning("⚠️ Translation failed - returning original text")
602
- return text
603
-
604
- # Cache and return
605
- if result and result != text and use_cache:
606
- self._cache[cache_key] = result
607
- if len(self._cache) > 500:
608
- keys = list(self._cache.keys())[:100]
609
- for k in keys:
610
- del self._cache[k]
611
 
612
- return result if result else text
 
613
 
614
  def translate_batch(self, texts: List[str], target_language: str) -> List[str]:
615
  """Translate multiple texts."""
616
  return [self.translate(text, target_language) for text in texts]
617
 
618
  def generate(self, prompt: str, max_tokens: int = 512, temperature: float = 0.7, **kwargs) -> str:
619
- """
620
- Generate text using hybrid approach.
621
- 1. Try API first
622
- 2. Fall back to GGUF (if available)
623
- """
624
- result = None
625
-
626
- # Try API first if preferred and available
627
- if self.prefer_api and self.api_client.api_token:
628
- logger.info("📡 Trying API generation...")
629
- result = self.api_client.generate(prompt, max_tokens, temperature)
630
- if result:
631
- logger.info("✅ API generation successful")
632
-
633
- # Fall back to GGUF (only if available)
634
- if result is None and LLAMA_CPP_AVAILABLE:
635
- logger.info("🔄 Using GGUF for generation (fallback)...")
636
- result = self.local_model.generate(prompt, max_tokens, temperature)
637
-
638
  return result if result else ""
639
 
640
  def chat_response(self, message: str, context: Dict, language: str = "en") -> str:
641
  """Generate chat response with context."""
642
- result = None
643
-
644
- # Try GGUF first for better context handling (if available)
645
- if LLAMA_CPP_AVAILABLE and self.local_model.is_loaded:
646
- result = self.local_model.chat_response(message, context, language)
647
-
648
- # Fall back to API
649
- if result is None and self.api_client.api_token:
650
- result = self.api_client.chat_response(message, context, language)
651
-
652
- return result if result else "I'm sorry, I couldn't generate a response. Please try again."
653
 
654
  def load_local_model(self) -> bool:
655
- if LLAMA_CPP_AVAILABLE:
656
- return self.local_model.load_model()
657
- return False
658
 
659
  def unload_local_model(self):
660
- if LLAMA_CPP_AVAILABLE:
661
- self.local_model.unload_model()
662
 
663
  def get_status(self) -> Dict:
664
  return {
665
- "api_available": self.api_client.is_available() if self.api_client.api_token else False,
666
- "api_token_set": bool(self.api_client.api_token),
667
- "llama_cpp_available": LLAMA_CPP_AVAILABLE,
668
- "local_model_loaded": self.local_model.is_loaded,
669
  "gpu_available": HAS_GPU,
670
- "prefer_api": self.prefer_api,
 
671
  "cache_size": len(self._cache),
672
  "running_on": "HuggingFace Spaces" if IS_HF_SPACES else "Local"
673
  }
@@ -694,8 +526,7 @@ def get_natlas_model(
694
  if _model_instance is None:
695
  _model_instance = NATLaSModel(
696
  api_token=api_token,
697
- prefer_api=True,
698
- auto_load_local=auto_load_local,
699
  **kwargs
700
  )
701
 
 
1
  """
2
+ FarmEyes N-ATLaS Model Integration (Transformers Version)
3
+ ==========================================================
4
+ Uses the official N-ATLaS model via HuggingFace Transformers library.
5
+ NO llama-cpp-python required - faster builds, official model support.
 
6
 
7
+ Model: NCAIR1/N-ATLaS (8B parameters, Llama-3 based)
8
+ Size: ~16GB (downloaded at runtime)
 
 
 
 
 
9
 
10
  Languages: English, Hausa, Yoruba, Igbo
11
+
12
+ Powered by Awarri Technologies and the Federal Ministry of
13
+ Communications, Innovation and Digital Economy.
14
  """
15
 
16
  import os
 
19
  from typing import Optional, Dict, List
20
  import logging
21
  import time
22
+ from datetime import datetime
23
 
24
  logging.basicConfig(level=logging.INFO)
25
  logger = logging.getLogger(__name__)
26
 
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  # =============================================================================
29
  # ENVIRONMENT DETECTION
30
  # =============================================================================
31
 
 
32
  IS_HF_SPACES = os.environ.get("SPACE_ID") is not None
33
 
34
  # Check for GPU
35
  HAS_GPU = False
36
+ GPU_NAME = "None"
37
  try:
38
  import torch
39
  HAS_GPU = torch.cuda.is_available()
40
  if HAS_GPU:
41
+ GPU_NAME = torch.cuda.get_device_name(0)
42
+ logger.info(f"🎮 GPU detected: {GPU_NAME}")
43
+ else:
44
+ logger.info("🖥️ No GPU detected - using CPU")
45
  except ImportError:
46
+ logger.warning("PyTorch not installed")
47
+
48
+ if IS_HF_SPACES:
49
+ logger.info("🤗 Running on HuggingFace Spaces")
 
 
 
 
 
50
  else:
 
51
  logger.info("🖥️ Running locally")
52
 
 
 
53
 
54
  # =============================================================================
55
  # LANGUAGE MAPPINGS
 
71
 
72
 
73
  # =============================================================================
74
+ # N-ATLAS MODEL (TRANSFORMERS VERSION)
75
  # =============================================================================
76
 
77
+ class NATLaSTransformersModel:
78
  """
79
+ N-ATLaS model using HuggingFace Transformers.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
 
81
+ This is the OFFICIAL way to use N-ATLaS as shown in the model documentation.
82
+ No llama-cpp-python compilation required!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
+ Model: NCAIR1/N-ATLaS
85
+ Base: Llama-3 8B
86
+ Size: ~16GB
87
  """
88
 
89
+ MODEL_ID = "NCAIR1/N-ATLaS"
 
90
 
91
  def __init__(
92
  self,
93
+ model_id: str = MODEL_ID,
94
+ torch_dtype: str = "float16",
95
+ device_map: str = "auto",
96
+ load_on_init: bool = True
 
 
97
  ):
98
+ self.model_id = model_id
99
+ self.torch_dtype = torch_dtype
100
+ self.device_map = device_map
 
 
 
101
 
102
  self._model = None
103
+ self._tokenizer = None
104
  self._is_loaded = False
105
 
106
+ logger.info(f"NATLaS Config: model={model_id}, dtype={torch_dtype}, device_map={device_map}")
107
+
108
+ if load_on_init:
109
+ self.load_model()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
 
111
  def load_model(self) -> bool:
112
+ """Load N-ATLaS model using transformers."""
113
  if self._is_loaded:
114
  return True
115
 
 
 
 
 
 
 
116
  try:
117
+ import torch
118
+ from transformers import AutoTokenizer, AutoModelForCausalLM
119
+
120
+ logger.info("=" * 60)
121
+ logger.info("📥 LOADING N-ATLaS MODEL")
122
+ logger.info("=" * 60)
123
+ logger.info(f" Model: {self.model_id}")
124
+ logger.info(f" Size: ~16GB")
125
+ logger.info(" This may take 5-15 minutes on first load...")
126
+ logger.info("=" * 60)
127
 
128
+ # Determine torch dtype
129
+ if self.torch_dtype == "float16":
130
+ dtype = torch.float16
131
+ elif self.torch_dtype == "bfloat16":
132
+ dtype = torch.bfloat16
133
+ else:
134
+ dtype = torch.float32
135
 
136
+ # Load tokenizer
137
+ logger.info("Loading tokenizer...")
138
+ self._tokenizer = AutoTokenizer.from_pretrained(
139
+ self.model_id,
140
+ trust_remote_code=True
141
+ )
142
 
143
+ # Load model
144
+ logger.info("Loading model weights...")
145
+ self._model = AutoModelForCausalLM.from_pretrained(
146
+ self.model_id,
147
+ torch_dtype=dtype,
148
+ device_map=self.device_map,
149
+ trust_remote_code=True
150
  )
151
 
152
  self._is_loaded = True
153
+
154
+ logger.info("=" * 60)
155
+ logger.info("✅ N-ATLaS MODEL LOADED SUCCESSFULLY!")
156
+ if HAS_GPU:
157
+ logger.info(f" Running on GPU: {GPU_NAME}")
158
+ else:
159
+ logger.info(" Running on CPU")
160
+ logger.info("=" * 60)
161
+
162
  return True
163
 
 
 
 
164
  except Exception as e:
165
+ logger.error(f"❌ Failed to load N-ATLaS model: {e}")
166
+ logger.error(" Make sure you have accepted the model license at:")
167
+ logger.error(" https://huggingface.co/NCAIR1/N-ATLaS")
168
  return False
169
 
170
  def unload_model(self):
 
172
  if self._model is not None:
173
  del self._model
174
  self._model = None
175
+ if self._tokenizer is not None:
176
+ del self._tokenizer
177
+ self._tokenizer = None
178
+ self._is_loaded = False
179
+
180
+ # Clear CUDA cache
181
+ try:
182
+ import torch
183
+ if torch.cuda.is_available():
184
+ torch.cuda.empty_cache()
185
+ except:
186
+ pass
187
+
188
+ logger.info("Model unloaded")
189
 
190
  @property
191
  def is_loaded(self) -> bool:
192
  return self._is_loaded
193
 
194
+ def _format_messages(self, messages: List[Dict]) -> str:
195
+ """Format messages using the tokenizer's chat template."""
196
+ try:
197
+ current_date = datetime.now().strftime('%d %b %Y')
198
+ text = self._tokenizer.apply_chat_template(
199
+ messages,
200
+ add_generation_prompt=True,
201
+ tokenize=False,
202
+ date_string=current_date
203
+ )
204
+ return text
205
+ except Exception as e:
206
+ # Fallback formatting if chat template fails
207
+ logger.warning(f"Chat template failed, using fallback: {e}")
208
+ text = ""
209
+ for msg in messages:
210
+ role = msg.get("role", "user")
211
+ content = msg.get("content", "")
212
+ if role == "system":
213
+ text += f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{content}<|eot_id|>"
214
+ elif role == "user":
215
+ text += f"<|start_header_id|>user<|end_header_id|>\n\n{content}<|eot_id|>"
216
+ elif role == "assistant":
217
+ text += f"<|start_header_id|>assistant<|end_header_id|>\n\n{content}<|eot_id|>"
218
+ text += "<|start_header_id|>assistant<|end_header_id|>\n\n"
219
+ return text
220
+
221
  def generate(
222
  self,
223
  prompt: str,
224
+ system_prompt: str = None,
225
+ max_new_tokens: int = 512,
226
  temperature: float = 0.7,
227
  top_p: float = 0.9,
228
+ repetition_penalty: float = 1.12
229
  ) -> Optional[str]:
230
+ """Generate text using N-ATLaS model."""
 
 
 
 
231
  if not self._is_loaded:
232
  if not self.load_model():
233
  return None
234
 
235
  try:
236
+ import torch
237
+
238
+ # Default system prompt
239
+ if system_prompt is None:
240
+ system_prompt = (
241
+ "You are a helpful AI assistant for African farmers. "
242
+ "You help with crop disease diagnosis, treatment advice, and agricultural questions. "
243
+ "Respond in the same language the user writes in."
244
+ )
245
+
246
+ # Format messages
247
+ messages = [
248
+ {"role": "system", "content": system_prompt},
249
+ {"role": "user", "content": prompt}
250
+ ]
251
+
252
+ text = self._format_messages(messages)
253
 
254
+ # Tokenize
255
+ input_tokens = self._tokenizer(
256
+ text,
257
+ return_tensors='pt',
258
+ add_special_tokens=False
 
 
259
  )
260
 
261
+ # Move to device
262
+ if HAS_GPU:
263
+ input_tokens = input_tokens.to('cuda')
264
 
265
+ # Generate
266
+ with torch.no_grad():
267
+ outputs = self._model.generate(
268
+ **input_tokens,
269
+ max_new_tokens=max_new_tokens,
270
+ temperature=temperature,
271
+ top_p=top_p,
272
+ repetition_penalty=repetition_penalty,
273
+ do_sample=True,
274
+ use_cache=True,
275
+ pad_token_id=self._tokenizer.eos_token_id
276
+ )
277
 
278
+ # Decode
279
+ full_response = self._tokenizer.decode(outputs[0], skip_special_tokens=False)
280
 
281
+ # Extract assistant response
282
+ # Look for the last assistant header and get text after it
283
+ assistant_marker = "<|start_header_id|>assistant<|end_header_id|>"
284
+ if assistant_marker in full_response:
285
+ response = full_response.split(assistant_marker)[-1]
286
  else:
287
+ response = full_response
288
+
289
+ # Clean up special tokens
290
+ for token in ["<|eot_id|>", "<|end_of_text|>", "<|begin_of_text|>",
291
+ "<|start_header_id|>", "<|end_header_id|>"]:
292
+ response = response.replace(token, "")
293
+
294
+ response = response.strip()
295
 
296
+ if response:
297
+ logger.info(f"✅ Generation successful: {len(response)} chars")
298
+ return response
299
+ else:
300
+ logger.warning("⚠️ Empty response generated")
301
+ return None
302
+
303
  except Exception as e:
304
+ logger.error(f"❌ Generation error: {e}")
305
  return None
306
 
307
  def translate(self, text: str, target_language: str) -> Optional[str]:
308
+ """Translate text to target language."""
 
 
 
309
  if target_language == "en" or not text:
310
  return text
311
 
312
  lang_name = LANGUAGE_NAMES.get(target_language, target_language)
313
+
314
+ prompt = f"Translate the following text to {lang_name}. Only provide the translation, nothing else.\n\nText: {text}"
315
+
316
+ system_prompt = f"You are a professional translator. Translate text accurately to {lang_name}. Only output the translation."
317
 
318
  result = self.generate(
319
+ prompt=prompt,
320
+ system_prompt=system_prompt,
321
+ max_new_tokens=len(text) * 4,
322
+ temperature=0.3,
323
+ repetition_penalty=1.1
324
  )
325
 
326
  if result:
327
  result = result.strip()
328
+ # Clean up common prefixes
329
+ prefixes_to_remove = [
330
+ f"{lang_name}:",
331
+ f"{lang_name} translation:",
332
+ "Translation:",
333
+ "Here is the translation:",
334
+ "The translation is:",
335
+ ]
336
+ for prefix in prefixes_to_remove:
337
  if result.lower().startswith(prefix.lower()):
338
  result = result[len(prefix):].strip()
339
  return result
 
342
 
343
  def chat_response(self, message: str, context: Dict, language: str = "en") -> Optional[str]:
344
  """Generate chat response with diagnosis context."""
 
 
 
345
  crop = context.get("crop_type", "crop").capitalize()
346
  disease = context.get("disease_name", "unknown disease")
347
  severity = context.get("severity_level", "unknown")
 
349
  if confidence <= 1:
350
  confidence = int(confidence * 100)
351
 
352
+ # Language instruction
353
  lang_instructions = {
354
+ "en": "Respond in English.",
355
+ "ha": "Respond in Hausa language (Yaren Hausa).",
356
+ "yo": "Respond in Yoruba language (Èdè Yorùbá).",
357
+ "ig": "Respond in Igbo language (Asụsụ Igbo)."
358
  }
359
  lang_instruction = lang_instructions.get(language, "Respond in English.")
360
 
361
+ system_prompt = (
362
+ "You are FarmEyes, an AI assistant helping African farmers with crop diseases. "
363
+ "You provide practical, helpful advice about crop diseases and farming. "
364
+ f"{lang_instruction}"
365
+ )
366
+
367
  prompt = (
368
+ f"Current diagnosis information:\n"
369
+ f"- Crop: {crop}\n"
370
+ f"- Disease: {disease}\n"
371
+ f"- Severity: {severity}\n"
372
+ f"- Confidence: {confidence}%\n\n"
373
+ f"Farmer's question: {message}\n\n"
374
+ f"Provide a helpful, practical response about this disease or related farming advice. "
375
+ f"Keep your response concise (2-3 paragraphs maximum)."
376
+ )
377
+
378
+ return self.generate(
379
+ prompt=prompt,
380
+ system_prompt=system_prompt,
381
+ max_new_tokens=500,
382
+ temperature=0.7
383
  )
 
 
384
 
385
 
386
  # =============================================================================
 
389
 
390
  class NATLaSModel:
391
  """
392
+ N-ATLaS model wrapper.
 
 
 
 
393
 
394
+ Uses the official NCAIR1/N-ATLaS model via HuggingFace Transformers.
395
+ This is the recommended way to use N-ATLaS.
 
396
  """
397
 
398
  def __init__(
399
  self,
400
+ api_token: Optional[str] = None, # Kept for compatibility
401
+ auto_load: bool = True,
402
+ **kwargs
 
403
  ):
404
+ # Get HF token from environment
405
+ self.hf_token = api_token or os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN")
406
+
407
+ if self.hf_token:
408
+ logger.info("✅ HuggingFace token found")
409
+ # Set token for huggingface_hub
410
+ try:
411
+ from huggingface_hub import login
412
+ login(token=self.hf_token, add_to_git_credential=False)
413
+ except Exception as e:
414
+ logger.warning(f"Could not set HF token: {e}")
415
+ else:
416
+ logger.warning("⚠️ No HF_TOKEN found - model access may fail")
417
 
418
+ # Initialize the transformers model
419
+ self.model = NATLaSTransformersModel(load_on_init=auto_load)
420
 
421
  # Translation cache
422
  self._cache: Dict[str, str] = {}
423
 
 
 
 
 
 
 
 
424
  logger.info("=" * 60)
425
+ logger.info("✅ NATLaSModel initialized")
426
+ logger.info(f" Model loaded: {'Yes' if self.model.is_loaded else 'No'}")
427
+ logger.info(f" GPU available: {'Yes - ' + GPU_NAME if HAS_GPU else 'No'}")
428
+ logger.info(f" HF Token: {'Yes' if self.hf_token else 'No'}")
 
429
  logger.info(f" Running on: {'HuggingFace Spaces' if IS_HF_SPACES else 'Local'}")
430
  logger.info("=" * 60)
431
 
432
  @property
433
  def is_loaded(self) -> bool:
434
+ return self.model.is_loaded
435
 
436
  def load_model(self) -> bool:
437
+ return self.model.load_model()
 
 
 
 
438
 
439
  def translate(self, text: str, target_language: str, use_cache: bool = True) -> str:
440
+ """Translate text to target language."""
 
 
 
 
441
  if target_language == "en" or not text or not text.strip():
442
  return text
443
 
444
  # Check cache
445
+ cache_key = f"{target_language}:{hash(text)}"
446
  if use_cache and cache_key in self._cache:
447
+ logger.info("📦 Using cached translation")
448
  return self._cache[cache_key]
449
 
450
+ logger.info(f"🌍 Translating to {LANGUAGE_NAMES.get(target_language, target_language)}...")
451
+ result = self.model.translate(text, target_language)
452
+
453
+ if result and result != text:
454
+ # Cache the result
455
+ if use_cache:
456
+ self._cache[cache_key] = result
457
+ # Limit cache size
458
+ if len(self._cache) > 500:
459
+ keys = list(self._cache.keys())[:100]
460
+ for k in keys:
461
+ del self._cache[k]
462
+ logger.info("✅ Translation successful")
463
+ return result
 
 
 
 
 
 
 
 
 
 
 
 
464
 
465
+ logger.warning("⚠️ Translation failed - returning original")
466
+ return text
467
 
468
  def translate_batch(self, texts: List[str], target_language: str) -> List[str]:
469
  """Translate multiple texts."""
470
  return [self.translate(text, target_language) for text in texts]
471
 
472
  def generate(self, prompt: str, max_tokens: int = 512, temperature: float = 0.7, **kwargs) -> str:
473
+ """Generate text."""
474
+ result = self.model.generate(
475
+ prompt=prompt,
476
+ max_new_tokens=max_tokens,
477
+ temperature=temperature
478
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
479
  return result if result else ""
480
 
481
  def chat_response(self, message: str, context: Dict, language: str = "en") -> str:
482
  """Generate chat response with context."""
483
+ result = self.model.chat_response(message, context, language)
484
+ if result:
485
+ return result
486
+ return "I'm sorry, I couldn't generate a response. Please try again."
 
 
 
 
 
 
 
487
 
488
  def load_local_model(self) -> bool:
489
+ """Compatibility method."""
490
+ return self.load_model()
 
491
 
492
  def unload_local_model(self):
493
+ """Unload model."""
494
+ self.model.unload_model()
495
 
496
  def get_status(self) -> Dict:
497
  return {
498
+ "model_loaded": self.model.is_loaded,
499
+ "model_id": self.model.model_id,
 
 
500
  "gpu_available": HAS_GPU,
501
+ "gpu_name": GPU_NAME if HAS_GPU else None,
502
+ "hf_token_set": bool(self.hf_token),
503
  "cache_size": len(self._cache),
504
  "running_on": "HuggingFace Spaces" if IS_HF_SPACES else "Local"
505
  }
 
526
  if _model_instance is None:
527
  _model_instance = NATLaSModel(
528
  api_token=api_token,
529
+ auto_load=auto_load_local,
 
530
  **kwargs
531
  )
532
 
requirements.txt CHANGED
@@ -1,6 +1,7 @@
1
  # =============================================================================
2
- # FarmEyes - Requirements (GPU Version)
3
  # =============================================================================
 
4
  # Note: torch/torchvision already in base Docker image
5
  # =============================================================================
6
 
@@ -9,24 +10,29 @@ fastapi>=0.104.0
9
  uvicorn[standard]>=0.24.0
10
  python-multipart>=0.0.6
11
 
12
- # AI/ML (torch already in base image)
13
- ultralytics>=8.0.0
14
  transformers>=4.35.0
 
15
  huggingface-hub>=0.19.0
16
 
17
- # Audio
 
 
 
18
  openai-whisper>=20231117
19
  soundfile>=0.12.0
20
 
21
- # Image
22
  Pillow>=10.0.0
23
  opencv-python-headless>=4.8.0
24
 
25
  # HTTP
26
  requests>=2.31.0
27
 
28
- # Utils
29
  numpy>=1.24.0
30
  scipy>=1.11.0
31
  pydantic>=2.0.0
32
  python-dotenv>=1.0.0
 
 
 
1
  # =============================================================================
2
+ # FarmEyes - Requirements (Transformers Version)
3
  # =============================================================================
4
+ # NO llama-cpp-python needed! Uses official N-ATLaS via transformers.
5
  # Note: torch/torchvision already in base Docker image
6
  # =============================================================================
7
 
 
10
  uvicorn[standard]>=0.24.0
11
  python-multipart>=0.0.6
12
 
13
+ # AI/ML - Transformers (for N-ATLaS)
 
14
  transformers>=4.35.0
15
+ accelerate>=0.25.0
16
  huggingface-hub>=0.19.0
17
 
18
+ # AI/ML - Vision (for YOLOv11)
19
+ ultralytics>=8.0.0
20
+
21
+ # Audio Processing (for Whisper)
22
  openai-whisper>=20231117
23
  soundfile>=0.12.0
24
 
25
+ # Image Processing
26
  Pillow>=10.0.0
27
  opencv-python-headless>=4.8.0
28
 
29
  # HTTP
30
  requests>=2.31.0
31
 
32
+ # Utilities
33
  numpy>=1.24.0
34
  scipy>=1.11.0
35
  pydantic>=2.0.0
36
  python-dotenv>=1.0.0
37
+ sentencepiece>=0.1.99
38
+ protobuf>=3.20.0