Fola-AI commited on
Commit
136baa0
·
1 Parent(s): 9024447

Simplify translations - GGUF model with GPU is fast enough

Browse files
Files changed (2) hide show
  1. models/natlas_model.py +15 -125
  2. services/diagnosis_generator.py +152 -433
models/natlas_model.py CHANGED
@@ -395,89 +395,22 @@ class NATLaSTransformersModel:
395
 
396
  def translate_batch(self, texts: List[str], target_language: str) -> List[str]:
397
  """
398
- Translate multiple texts in a SINGLE model call.
399
 
400
- This is much faster than translating each text separately because:
401
- - Only ONE model inference instead of N inferences
402
- - Reduces total time from O(N * inference_time) to O(inference_time)
403
-
404
- Example: 10 translations in ~30-60 seconds instead of ~5+ minutes
405
  """
406
  if target_language == "en" or not texts:
407
  return texts
408
 
409
- # Filter out empty texts and track their positions
410
- non_empty_texts = []
411
- positions = []
412
- for i, text in enumerate(texts):
413
  if text and text.strip():
414
- non_empty_texts.append(text.strip())
415
- positions.append(i)
416
-
417
- if not non_empty_texts:
418
- return texts
419
-
420
- lang_name = LANGUAGE_NAMES.get(target_language, target_language)
421
-
422
- # Build numbered list for batch translation
423
- numbered_texts = "\n".join([f"{i+1}. {text}" for i, text in enumerate(non_empty_texts)])
424
-
425
- prompt = f"""Translate each numbered item below to {lang_name}.
426
- Return ONLY the translations in the same numbered format. Do not add explanations.
427
-
428
- {numbered_texts}"""
429
-
430
- system_prompt = f"You are a professional translator. Translate each numbered item to {lang_name}. Return only the numbered translations, nothing else."
431
-
432
- # Calculate max tokens based on total input length
433
- total_chars = sum(len(t) for t in non_empty_texts)
434
- max_tokens = min(total_chars * 4, 2000) # Cap at 2000 tokens
435
-
436
- logger.info(f"🚀 Batch translating {len(non_empty_texts)} texts to {lang_name}...")
437
-
438
- result = self.generate(
439
- prompt=prompt,
440
- system_prompt=system_prompt,
441
- max_new_tokens=max_tokens,
442
- temperature=0.3,
443
- repetition_penalty=1.1
444
- )
445
-
446
- if not result:
447
- logger.warning("⚠️ Batch translation failed - returning originals")
448
- return texts
449
-
450
- # Parse the numbered results
451
- translations = self._parse_numbered_translations(result, len(non_empty_texts))
452
-
453
- # Build final result list
454
- final_results = list(texts) # Copy original
455
- for idx, pos in enumerate(positions):
456
- if idx < len(translations) and translations[idx]:
457
- final_results[pos] = translations[idx]
458
-
459
- logger.info(f"✅ Batch translation complete: {len(translations)} items")
460
- return final_results
461
-
462
- def _parse_numbered_translations(self, result: str, expected_count: int) -> List[str]:
463
- """Parse numbered translation results."""
464
- translations = []
465
- lines = result.strip().split('\n')
466
 
467
- for line in lines:
468
- line = line.strip()
469
- if not line:
470
- continue
471
-
472
- # Try to extract numbered items (e.g., "1. Translation" or "1) Translation")
473
- match = re.match(r'^\d+[\.\)]\s*(.+)$', line)
474
- if match:
475
- translations.append(match.group(1).strip())
476
- elif line and not line[0].isdigit():
477
- # Line without number - might be continuation or standalone
478
- if translations:
479
- # Append to last translation if it seems like continuation
480
- pass
481
  else:
482
  translations.append(line)
483
 
@@ -611,63 +544,20 @@ class NATLaSModel:
611
 
612
  def translate_batch(self, texts: List[str], target_language: str, use_cache: bool = True) -> List[str]:
613
  """
614
- Translate multiple texts efficiently using batch translation.
615
 
616
- This method:
617
- 1. Checks cache for already-translated texts
618
- 2. Sends remaining texts in ONE batch call to the model
619
- 3. Caches new translations
620
-
621
- Result: ~5x faster than individual translations!
622
  """
623
  if target_language == "en" or not texts:
624
  return texts
625
 
626
- # Separate cached and uncached texts
627
- results = [None] * len(texts)
628
- uncached_texts = []
629
- uncached_indices = []
630
-
631
  for i, text in enumerate(texts):
632
  if not text or not text.strip():
633
- results[i] = text
634
- continue
635
-
636
- cache_key = f"{target_language}:{hash(text)}"
637
- if use_cache and cache_key in self._cache:
638
- results[i] = self._cache[cache_key]
639
- logger.info(f"📦 Cache hit for text {i+1}")
640
- else:
641
- uncached_texts.append(text)
642
- uncached_indices.append(i)
643
-
644
- # If all texts were cached, return immediately
645
- if not uncached_texts:
646
- logger.info("📦 All translations from cache!")
647
- return results
648
-
649
- # Batch translate uncached texts
650
- logger.info(f"🚀 Batch translating {len(uncached_texts)} texts ({len(texts) - len(uncached_texts)} cached)...")
651
-
652
- translated = self.model.translate_batch(uncached_texts, target_language)
653
-
654
- # Store results and cache them
655
- for idx, original_idx in enumerate(uncached_indices):
656
- if idx < len(translated) and translated[idx]:
657
- results[original_idx] = translated[idx]
658
- # Cache the translation
659
- if use_cache:
660
- cache_key = f"{target_language}:{hash(uncached_texts[idx])}"
661
- self._cache[cache_key] = translated[idx]
662
  else:
663
- # Fallback to original text
664
- results[original_idx] = texts[original_idx]
665
-
666
- # Clean cache if too large
667
- if len(self._cache) > 500:
668
- keys = list(self._cache.keys())[:100]
669
- for k in keys:
670
- del self._cache[k]
671
 
672
  return results
673
 
 
395
 
396
  def translate_batch(self, texts: List[str], target_language: str) -> List[str]:
397
  """
398
+ Translate multiple texts using individual translations.
399
 
400
+ With GGUF model + GPU, individual translations are fast enough!
 
 
 
 
401
  """
402
  if target_language == "en" or not texts:
403
  return texts
404
 
405
+ results = []
406
+ for text in texts:
 
 
407
  if text and text.strip():
408
+ translated = self.translate(text, target_language)
409
+ results.append(translated if translated else text)
410
+ else:
411
+ results.append(text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
412
 
413
+ return results
 
 
 
 
 
 
 
 
 
 
 
 
 
414
  else:
415
  translations.append(line)
416
 
 
544
 
545
  def translate_batch(self, texts: List[str], target_language: str, use_cache: bool = True) -> List[str]:
546
  """
547
+ Translate multiple texts using individual translations.
548
 
549
+ With the smaller GGUF model + GPU, individual translations are fast enough!
 
 
 
 
 
550
  """
551
  if target_language == "en" or not texts:
552
  return texts
553
 
554
+ results = []
 
 
 
 
555
  for i, text in enumerate(texts):
556
  if not text or not text.strip():
557
+ results.append(text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
558
  else:
559
+ translated = self.translate(text, target_language, use_cache)
560
+ results.append(translated)
 
 
 
 
 
 
561
 
562
  return results
563
 
services/diagnosis_generator.py CHANGED
@@ -1,14 +1,11 @@
1
  """
2
- FarmEyes Diagnosis Generator Service (OPTIMIZED)
3
- =================================================
4
  Generates complete multilingual diagnosis reports by combining:
5
  - Disease detection results (from YOLO model)
6
  - Knowledge base information (symptoms, treatments, costs)
7
  - N-ATLaS translations (Hausa, Yoruba, Igbo)
8
 
9
- OPTIMIZATION: Uses batch translation to reduce N-ATLaS calls from 20+ to just 1-2.
10
- This improves translation speed from ~5 minutes to ~30-60 seconds.
11
-
12
  Produces farmer-friendly reports with actionable treatment recommendations.
13
  """
14
 
@@ -187,8 +184,6 @@ class DiagnosisGenerator:
187
  """
188
  Generates complete multilingual diagnosis reports.
189
  Combines disease detection, knowledge base, and translation services.
190
-
191
- OPTIMIZED: Uses batch translation for 5x faster performance.
192
  """
193
 
194
  def __init__(self, auto_load_models: bool = False):
@@ -205,7 +200,7 @@ class DiagnosisGenerator:
205
  # Load services
206
  self._init_services(auto_load_models)
207
 
208
- logger.info("DiagnosisGenerator initialized (OPTIMIZED with batch translation)")
209
 
210
  def _init_services(self, auto_load_models: bool) -> None:
211
  """Initialize required services."""
@@ -301,7 +296,7 @@ class DiagnosisGenerator:
301
  return self._build_report(detection_result, language)
302
 
303
  # =========================================================================
304
- # REPORT BUILDING (OPTIMIZED WITH BATCH TRANSLATION)
305
  # =========================================================================
306
 
307
  def _build_report(
@@ -332,435 +327,168 @@ class DiagnosisGenerator:
332
  report.confidence_level = detection_result.get_confidence_level()
333
 
334
  if detection_result.is_healthy:
335
- self._build_healthy_report_optimized(report, detection_result, language)
336
  else:
337
- self._build_disease_report_optimized(report, detection_result, language)
338
 
339
  return report
340
 
341
- def _build_healthy_report_optimized(
342
  self,
343
  report: DiagnosisReport,
344
  detection_result,
345
  language: str
346
  ) -> None:
347
- """Build report for healthy plant detection using batch translation."""
348
-
349
- # Skip translation for English
350
- if language == "en":
351
- report.disease_name = detection_result.display_name or "Healthy Plant"
352
- report.summary_message = f"Great news! Your {detection_result.crop_type} plant appears to be healthy. Continue with good farming practices to maintain plant health."
353
- report.healthy_message = detection_result.healthy_message or report.summary_message
354
-
355
- if detection_result.maintenance_tips:
356
- report.maintenance_tips = detection_result.maintenance_tips[:6]
357
- else:
358
- report.maintenance_tips = [
359
- "Continue regular monitoring for early disease detection",
360
- "Maintain proper watering and fertilization",
361
- "Keep the field free of weeds",
362
- "Practice crop rotation",
363
- "Use disease-free planting materials"
364
- ]
365
- report.expected_yield = detection_result.expected_yield
366
- return
367
-
368
- # Collect all texts to translate in one batch
369
- texts_to_translate = []
370
- text_keys = [] # Track what each text is for
371
-
372
- # 1. Disease name
373
- texts_to_translate.append(detection_result.display_name or "Healthy Plant")
374
- text_keys.append("disease_name")
375
 
376
- # 2. Summary message
377
  summary_en = f"Great news! Your {detection_result.crop_type} plant appears to be healthy. Continue with good farming practices to maintain plant health."
378
- texts_to_translate.append(summary_en)
379
- text_keys.append("summary_message")
380
 
381
- # 3. Healthy message
382
  if detection_result.healthy_message:
383
- texts_to_translate.append(detection_result.healthy_message)
384
- text_keys.append("healthy_message")
 
 
 
 
385
 
386
- # 4. Maintenance tips
387
  if detection_result.maintenance_tips:
388
- tips = detection_result.maintenance_tips[:6]
 
 
 
389
  else:
390
- tips = [
 
391
  "Continue regular monitoring for early disease detection",
392
  "Maintain proper watering and fertilization",
393
  "Keep the field free of weeds",
394
  "Practice crop rotation",
395
  "Use disease-free planting materials"
396
  ]
 
397
 
398
- tip_start_idx = len(texts_to_translate)
399
- for tip in tips:
400
- texts_to_translate.append(tip)
401
- text_keys.append("maintenance_tip")
402
-
403
- # BATCH TRANSLATE ALL AT ONCE
404
- logger.info(f"🚀 Batch translating {len(texts_to_translate)} texts for healthy report...")
405
- translated = self._translate_batch(texts_to_translate, language)
406
-
407
- # Apply translations
408
- idx = 0
409
- report.disease_name = translated[idx]; idx += 1
410
- report.summary_message = translated[idx]; idx += 1
411
-
412
- if "healthy_message" in text_keys:
413
- report.healthy_message = translated[idx]; idx += 1
414
- else:
415
- report.healthy_message = report.summary_message
416
-
417
- # Maintenance tips
418
- report.maintenance_tips = translated[tip_start_idx:]
419
  report.expected_yield = detection_result.expected_yield
420
 
421
- def _build_disease_report_optimized(
422
  self,
423
  report: DiagnosisReport,
424
  detection_result,
425
  language: str
426
  ) -> None:
427
- """
428
- Build report for disease detection using BATCH translation.
429
-
430
- OPTIMIZATION: Instead of 20+ individual translate() calls,
431
- we collect ALL texts and translate them in 1-2 batch calls.
432
- This reduces time from ~5 minutes to ~30-60 seconds.
433
- """
434
-
435
- # Set non-translated fields first
436
  report.disease_name_scientific = detection_result.scientific_name
437
  report.disease_category = detection_result.category
 
 
 
 
 
 
438
  report.severity_scale = detection_result.severity_scale
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
439
  report.yield_loss_min = detection_result.yield_loss_min
440
  report.yield_loss_max = detection_result.yield_loss_max
 
 
 
 
 
 
 
441
  report.treatment_cost_min = detection_result.treatment_cost_min
442
  report.treatment_cost_max = detection_result.treatment_cost_max
 
 
 
 
 
 
 
 
 
443
 
444
- # Expert contact (no translation needed)
 
 
 
445
  expert = detection_result.expert_contact
446
  if expert:
447
  report.expert_institution = expert.get("institution", "")
448
  report.expert_location = expert.get("location", "")
449
  report.expert_services = expert.get("services", "")
450
-
451
- # Skip translation for English
452
- if language == "en":
453
- self._build_disease_report_english(report, detection_result)
454
- return
455
-
456
- # =====================================================================
457
- # COLLECT ALL TEXTS FOR BATCH TRANSLATION
458
- # =====================================================================
459
-
460
- texts_to_translate = []
461
- text_mapping = [] # Track (field_name, index_in_list_or_none)
462
-
463
- # 1. Disease name
464
- texts_to_translate.append(detection_result.display_name)
465
- text_mapping.append(("disease_name", None))
466
-
467
- # 2. Severity level
468
- texts_to_translate.append(detection_result.severity_level.replace("_", " ").title())
469
- text_mapping.append(("severity_level", None))
470
-
471
- # 3. Severity description
472
- texts_to_translate.append(detection_result.severity_description)
473
- text_mapping.append(("severity_description", None))
474
-
475
- # 4. Summary message
476
- summary_en = self._create_summary_message(detection_result)
477
- texts_to_translate.append(summary_en)
478
- text_mapping.append(("summary_message", None))
479
-
480
- # 5. Yield loss message
481
- yield_msg_en = f"This disease can cause {detection_result.yield_loss_min}% to {detection_result.yield_loss_max}% yield loss if not treated."
482
- texts_to_translate.append(yield_msg_en)
483
- text_mapping.append(("yield_loss_message", None))
484
-
485
- # 6. Cost message
486
- cost_msg_en = f"Estimated treatment cost: ₦{detection_result.treatment_cost_min:,} to ₦{detection_result.treatment_cost_max:,} per hectare."
487
- texts_to_translate.append(cost_msg_en)
488
- text_mapping.append(("cost_message", None))
489
-
490
- # 7. Symptoms (list)
491
- symptoms = detection_result.symptoms[:6] if detection_result.symptoms else []
492
- for i, symptom in enumerate(symptoms):
493
- texts_to_translate.append(symptom)
494
- text_mapping.append(("symptom", i))
495
-
496
- # 8. Transmission (list)
497
- transmission = detection_result.transmission[:5] if detection_result.transmission else []
498
- for i, trans in enumerate(transmission):
499
- texts_to_translate.append(trans)
500
- text_mapping.append(("transmission", i))
501
-
502
- # 9. Prevention tips (list)
503
- prevention = detection_result.prevention[:6] if detection_result.prevention else []
504
- for i, tip in enumerate(prevention):
505
- texts_to_translate.append(tip)
506
- text_mapping.append(("prevention", i))
507
-
508
- # 10. Treatment texts
509
- treatments = detection_result.treatments
510
- treatment_texts = self._collect_treatment_texts(treatments)
511
- treatment_start_idx = len(texts_to_translate)
512
- for item in treatment_texts:
513
- text = item[0]
514
- mapping_info = item[1] # This is a tuple that can have 2 or 3 elements
515
- texts_to_translate.append(text)
516
- text_mapping.append(mapping_info)
517
-
518
- # 11. Health projection messages
519
- projection = detection_result.health_projection
520
- projection_texts = []
521
- if projection:
522
- for stage, info in projection.items():
523
- if isinstance(info, dict) and info.get("message"):
524
- texts_to_translate.append(info.get("message", ""))
525
- text_mapping.append(("projection", stage))
526
- projection_texts.append((stage, info.get("recovery_chance_percent", 0)))
527
-
528
- # =====================================================================
529
- # BATCH TRANSLATE ALL TEXTS IN ONE CALL
530
- # =====================================================================
531
-
532
- logger.info(f"🚀 Batch translating {len(texts_to_translate)} texts for disease report...")
533
- translated = self._translate_batch(texts_to_translate, language)
534
-
535
- # =====================================================================
536
- # APPLY TRANSLATIONS TO REPORT
537
- # =====================================================================
538
-
539
- # Initialize lists
540
- report.symptoms = [""] * len(symptoms)
541
- report.transmission = [""] * len(transmission)
542
- report.prevention_tips = [""] * len(prevention)
543
-
544
- # Process each translated text
545
- for idx, (field, sub_idx) in enumerate(text_mapping):
546
- value = translated[idx] if idx < len(translated) else ""
547
-
548
- if field == "disease_name":
549
- report.disease_name = value
550
- elif field == "severity_level":
551
- report.severity_level = value
552
- elif field == "severity_description":
553
- report.severity_description = value
554
- elif field == "summary_message":
555
- report.summary_message = value
556
- elif field == "yield_loss_message":
557
- report.yield_loss_message = value
558
- elif field == "cost_message":
559
- report.cost_message = value
560
- elif field == "symptom":
561
- report.symptoms[sub_idx] = value
562
- elif field == "transmission":
563
- report.transmission[sub_idx] = value
564
- elif field == "prevention":
565
- report.prevention_tips[sub_idx] = value
566
- elif field == "projection":
567
- # sub_idx is the stage name
568
- if sub_idx not in report.health_projection:
569
- report.health_projection[sub_idx] = {}
570
- report.health_projection[sub_idx]["message"] = value
571
- # Find recovery chance from projection_texts
572
- for stage, chance in projection_texts:
573
- if stage == sub_idx:
574
- report.health_projection[sub_idx]["recovery_chance_percent"] = chance
575
-
576
- # Apply treatment translations
577
- self._apply_treatment_translations(
578
- report,
579
- treatments,
580
- translated[treatment_start_idx:],
581
- text_mapping[treatment_start_idx:]
582
- )
583
-
584
- # Set current projection
585
- self._set_current_projection(report, detection_result)
586
-
587
- logger.info("✅ Disease report built with batch translation")
588
 
589
- def _build_disease_report_english(
590
- self,
591
- report: DiagnosisReport,
592
- detection_result
593
- ) -> None:
594
- """Build English report without translation."""
595
- report.disease_name = detection_result.display_name
596
- report.severity_level = detection_result.severity_level.replace("_", " ").title()
597
- report.severity_description = detection_result.severity_description
598
- report.summary_message = self._create_summary_message(detection_result)
599
- report.yield_loss_message = f"This disease can cause {detection_result.yield_loss_min}% to {detection_result.yield_loss_max}% yield loss if not treated."
600
- report.cost_message = f"Estimated treatment cost: ₦{detection_result.treatment_cost_min:,} to ₦{detection_result.treatment_cost_max:,} per hectare."
601
-
602
- report.symptoms = detection_result.symptoms[:6] if detection_result.symptoms else []
603
- report.transmission = detection_result.transmission[:5] if detection_result.transmission else []
604
- report.prevention_tips = detection_result.prevention[:6] if detection_result.prevention else []
605
-
606
- # Build treatments without translation
607
- self._build_treatment_sections_english(report, detection_result)
608
-
609
- # Health projection
610
- projection = detection_result.health_projection
611
- if projection:
612
- for stage, info in projection.items():
613
- if isinstance(info, dict):
614
- report.health_projection[stage] = {
615
- "recovery_chance_percent": info.get("recovery_chance_percent", 0),
616
- "message": info.get("message", "")
617
- }
618
-
619
- self._set_current_projection(report, detection_result)
620
-
621
- def _collect_treatment_texts(self, treatments: Dict) -> List[Tuple[str, Tuple]]:
622
- """Collect all treatment texts that need translation."""
623
- texts = []
624
-
625
- # Cultural/immediate actions
626
- cultural = treatments.get("cultural", [])
627
- for i, t in enumerate(cultural[:4]):
628
- if t.get("method"):
629
- texts.append((t.get("method", ""), ("cultural_method", i)))
630
- if t.get("description"):
631
- texts.append((t.get("description", ""), ("cultural_desc", i)))
632
-
633
- # Chemical treatments
634
- chemical = treatments.get("chemical", [])
635
- for i, t in enumerate(chemical[:3]):
636
- if t.get("application_method"):
637
- texts.append((t.get("application_method", ""), ("chemical_app", i)))
638
- # Safety precautions
639
- for j, precaution in enumerate(t.get("safety_precautions", [])[:3]):
640
- texts.append((precaution, ("chemical_safety", i, j)))
641
-
642
- # Biological treatments
643
- biological = treatments.get("biological", [])
644
- for i, t in enumerate(biological[:2]):
645
- if t.get("method"):
646
- texts.append((t.get("method", ""), ("bio_method", i)))
647
- if t.get("description"):
648
- texts.append((t.get("description", ""), ("bio_desc", i)))
649
-
650
- # Traditional treatments
651
- traditional = treatments.get("traditional", [])
652
- for i, t in enumerate(traditional[:3]):
653
- if t.get("method"):
654
- texts.append((t.get("method", ""), ("trad_method", i)))
655
- if t.get("description"):
656
- texts.append((t.get("description", ""), ("trad_desc", i)))
657
-
658
- # Resistant varieties notes
659
- varieties = treatments.get("resistant_varieties", [])
660
- for i, v in enumerate(varieties[:3]):
661
- if v.get("notes"):
662
- texts.append((v.get("notes", ""), ("variety_notes", i)))
663
-
664
- return texts
665
-
666
- def _apply_treatment_translations(
667
- self,
668
- report: DiagnosisReport,
669
- treatments: Dict,
670
- translated: List[str],
671
- mapping: List[Tuple]
672
- ) -> None:
673
- """Apply translated treatment texts to report."""
674
-
675
- # Build translation lookup
676
- trans_lookup = {}
677
- for idx, map_info in enumerate(mapping):
678
- if idx < len(translated):
679
- trans_lookup[map_info] = translated[idx]
680
-
681
- # Cultural/immediate actions
682
- cultural = treatments.get("cultural", [])
683
- for i, t in enumerate(cultural[:4]):
684
- action = {
685
- "action": trans_lookup.get(("cultural_method", i), t.get("method", "")),
686
- "description": trans_lookup.get(("cultural_desc", i), t.get("description", "")),
687
- "effectiveness": t.get("effectiveness", ""),
688
- "timing": t.get("timing", "")
689
- }
690
- report.immediate_actions.append(action)
691
-
692
- # Chemical treatments
693
- chemical = treatments.get("chemical", [])
694
- for i, t in enumerate(chemical[:3]):
695
- safety = []
696
- for j in range(3):
697
- key = ("chemical_safety", i, j)
698
- if key in trans_lookup:
699
- safety.append(trans_lookup[key])
700
- elif j < len(t.get("safety_precautions", [])):
701
- safety.append(t.get("safety_precautions", [])[j])
702
-
703
- treatment = {
704
- "product_name": t.get("product_name", ""),
705
- "local_brands": t.get("local_brands", []),
706
- "dosage": t.get("dosage", ""),
707
- "frequency": t.get("frequency", ""),
708
- "application_method": trans_lookup.get(("chemical_app", i), t.get("application_method", "")),
709
- "cost_min": t.get("cost_ngn_min", 0),
710
- "cost_max": t.get("cost_ngn_max", 0),
711
- "effectiveness": t.get("effectiveness", ""),
712
- "safety_precautions": safety
713
- }
714
- report.chemical_treatments.append(treatment)
715
-
716
- # Biological treatments
717
- biological = treatments.get("biological", [])
718
- for i, t in enumerate(biological[:2]):
719
- treatment = {
720
- "method": trans_lookup.get(("bio_method", i), t.get("method", "")),
721
- "description": trans_lookup.get(("bio_desc", i), t.get("description", "")),
722
- "effectiveness": t.get("effectiveness", ""),
723
- "source": t.get("source", "")
724
- }
725
- report.organic_treatments.append(treatment)
726
 
727
- # Traditional treatments
728
- traditional = treatments.get("traditional", [])
729
- for i, t in enumerate(traditional[:3]):
730
- treatment = {
731
- "method": trans_lookup.get(("trad_method", i), t.get("method", "")),
732
- "description": trans_lookup.get(("trad_desc", i), t.get("description", "")),
733
- "cost": t.get("cost_ngn", 0),
734
- "effectiveness": t.get("effectiveness", "")
735
- }
736
- report.traditional_treatments.append(treatment)
737
 
738
- # Resistant varieties
739
- varieties = treatments.get("resistant_varieties", [])
740
- for i, v in enumerate(varieties[:3]):
741
- variety = {
742
- "name": v.get("variety_name", ""),
743
- "resistance_level": v.get("resistance_level", ""),
744
- "source": v.get("source", ""),
745
- "cost": v.get("cost_ngn_per_bundle", 0),
746
- "notes": trans_lookup.get(("variety_notes", i), v.get("notes", ""))
747
- }
748
- report.resistant_varieties.append(variety)
749
 
750
- def _build_treatment_sections_english(
751
  self,
752
  report: DiagnosisReport,
753
- detection_result
 
754
  ) -> None:
755
- """Build treatment sections without translation (English)."""
756
  treatments = detection_result.treatments
757
 
758
- # Immediate actions
759
  cultural = treatments.get("cultural", [])
760
  for t in cultural[:4]:
761
  action = {
762
- "action": t.get("method", ""),
763
- "description": t.get("description", ""),
764
  "effectiveness": t.get("effectiveness", ""),
765
  "timing": t.get("timing", "")
766
  }
@@ -774,20 +502,26 @@ class DiagnosisGenerator:
774
  "local_brands": t.get("local_brands", []),
775
  "dosage": t.get("dosage", ""),
776
  "frequency": t.get("frequency", ""),
777
- "application_method": t.get("application_method", ""),
 
 
 
778
  "cost_min": t.get("cost_ngn_min", 0),
779
  "cost_max": t.get("cost_ngn_max", 0),
780
  "effectiveness": t.get("effectiveness", ""),
781
- "safety_precautions": t.get("safety_precautions", [])[:3]
 
 
 
782
  }
783
  report.chemical_treatments.append(treatment)
784
 
785
- # Biological treatments
786
  biological = treatments.get("biological", [])
787
  for t in biological[:2]:
788
  treatment = {
789
- "method": t.get("method", ""),
790
- "description": t.get("description", ""),
791
  "effectiveness": t.get("effectiveness", ""),
792
  "source": t.get("source", "")
793
  }
@@ -797,8 +531,8 @@ class DiagnosisGenerator:
797
  traditional = treatments.get("traditional", [])
798
  for t in traditional[:3]:
799
  treatment = {
800
- "method": t.get("method", ""),
801
- "description": t.get("description", ""),
802
  "cost": t.get("cost_ngn", 0),
803
  "effectiveness": t.get("effectiveness", "")
804
  }
@@ -812,51 +546,51 @@ class DiagnosisGenerator:
812
  "resistance_level": v.get("resistance_level", ""),
813
  "source": v.get("source", ""),
814
  "cost": v.get("cost_ngn_per_bundle", 0),
815
- "notes": v.get("notes", "")
816
  }
817
  report.resistant_varieties.append(variety)
818
 
819
- def _set_current_projection(
820
  self,
821
  report: DiagnosisReport,
822
- detection_result
 
823
  ) -> None:
824
- """Set current projection based on confidence."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
825
  if detection_result.confidence >= 0.85:
 
826
  stage = "moderate_infection"
827
  elif detection_result.confidence >= 0.60:
 
828
  stage = "early_detection"
829
  else:
 
830
  stage = "early_detection"
831
 
832
  if stage in report.health_projection:
833
  report.current_projection = report.health_projection[stage]
834
 
835
- def _create_summary_message(self, detection_result) -> str:
836
- """Create English summary message for disease detection."""
837
- severity = detection_result.severity_level.replace("_", " ")
838
- confidence_pct = int(detection_result.confidence * 100)
839
-
840
- if detection_result.confidence >= 0.85:
841
- confidence_text = "high confidence"
842
- elif detection_result.confidence >= 0.60:
843
- confidence_text = "moderate confidence"
844
- else:
845
- confidence_text = "low confidence"
846
-
847
- return (
848
- f"We detected {detection_result.display_name} in your {detection_result.crop_type} "
849
- f"with {confidence_text} ({confidence_pct}%). "
850
- f"This is a {severity} severity disease. "
851
- f"Please follow the treatment recommendations below to protect your crop."
852
- )
853
-
854
  # =========================================================================
855
  # TRANSLATION HELPERS
856
  # =========================================================================
857
 
858
  def _translate(self, text: str, language: str) -> str:
859
- """Translate text to target language (single text fallback)."""
860
  if not text or language == "en":
861
  return text
862
 
@@ -866,13 +600,8 @@ class DiagnosisGenerator:
866
  logger.warning(f"Translation failed: {e}")
867
  return text
868
 
869
- def _translate_batch(self, texts: List[str], language: str) -> List[str]:
870
- """
871
- Translate multiple texts in a single batch call.
872
-
873
- This is the OPTIMIZED method - uses one model inference
874
- instead of N separate calls.
875
- """
876
  if not texts or language == "en":
877
  return texts
878
 
@@ -882,10 +611,6 @@ class DiagnosisGenerator:
882
  logger.warning(f"Batch translation failed: {e}")
883
  return texts
884
 
885
- def _translate_list(self, texts: List[str], language: str) -> List[str]:
886
- """Translate a list of texts (alias for _translate_batch)."""
887
- return self._translate_batch(texts, language)
888
-
889
  # =========================================================================
890
  # UTILITY METHODS
891
  # =========================================================================
@@ -996,7 +721,7 @@ def generate_diagnosis_with_image(
996
 
997
  if __name__ == "__main__":
998
  print("=" * 60)
999
- print("Diagnosis Generator Service Test (OPTIMIZED)")
1000
  print("=" * 60)
1001
 
1002
  # Initialize generator
@@ -1036,13 +761,7 @@ if __name__ == "__main__":
1036
  print(f" Report ID: {test_report.report_id}")
1037
  print(f" Short Summary: {test_report.get_short_summary()}")
1038
 
1039
- print("\n6. OPTIMIZATION INFO:")
1040
- print(" - Uses batch translation (1 call instead of 20+)")
1041
- print(" - Expected speedup: ~5x faster")
1042
- print(" - Old time: ~5 minutes")
1043
- print(" - New time: ~30-60 seconds")
1044
-
1045
- print("\n7. To generate actual diagnosis (requires models):")
1046
  print(" >>> report = generator.generate('/path/to/image.jpg', 'ha')")
1047
  print(" >>> print(report.summary_message)")
1048
  print(" >>> print(report.to_json())")
 
1
  """
2
+ FarmEyes Diagnosis Generator Service
3
+ ====================================
4
  Generates complete multilingual diagnosis reports by combining:
5
  - Disease detection results (from YOLO model)
6
  - Knowledge base information (symptoms, treatments, costs)
7
  - N-ATLaS translations (Hausa, Yoruba, Igbo)
8
 
 
 
 
9
  Produces farmer-friendly reports with actionable treatment recommendations.
10
  """
11
 
 
184
  """
185
  Generates complete multilingual diagnosis reports.
186
  Combines disease detection, knowledge base, and translation services.
 
 
187
  """
188
 
189
  def __init__(self, auto_load_models: bool = False):
 
200
  # Load services
201
  self._init_services(auto_load_models)
202
 
203
+ logger.info("DiagnosisGenerator initialized")
204
 
205
  def _init_services(self, auto_load_models: bool) -> None:
206
  """Initialize required services."""
 
296
  return self._build_report(detection_result, language)
297
 
298
  # =========================================================================
299
+ # REPORT BUILDING
300
  # =========================================================================
301
 
302
  def _build_report(
 
327
  report.confidence_level = detection_result.get_confidence_level()
328
 
329
  if detection_result.is_healthy:
330
+ self._build_healthy_report(report, detection_result, language)
331
  else:
332
+ self._build_disease_report(report, detection_result, language)
333
 
334
  return report
335
 
336
+ def _build_healthy_report(
337
  self,
338
  report: DiagnosisReport,
339
  detection_result,
340
  language: str
341
  ) -> None:
342
+ """Build report for healthy plant detection."""
343
+ # Disease name (translated)
344
+ report.disease_name = self._translate(
345
+ detection_result.display_name or "Healthy Plant",
346
+ language
347
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
348
 
349
+ # Summary message
350
  summary_en = f"Great news! Your {detection_result.crop_type} plant appears to be healthy. Continue with good farming practices to maintain plant health."
351
+ report.summary_message = self._translate(summary_en, language)
 
352
 
353
+ # Healthy message
354
  if detection_result.healthy_message:
355
+ report.healthy_message = self._translate(
356
+ detection_result.healthy_message,
357
+ language
358
+ )
359
+ else:
360
+ report.healthy_message = report.summary_message
361
 
362
+ # Maintenance tips
363
  if detection_result.maintenance_tips:
364
+ report.maintenance_tips = self._translate_list(
365
+ detection_result.maintenance_tips[:6],
366
+ language
367
+ )
368
  else:
369
+ # Default tips
370
+ default_tips = [
371
  "Continue regular monitoring for early disease detection",
372
  "Maintain proper watering and fertilization",
373
  "Keep the field free of weeds",
374
  "Practice crop rotation",
375
  "Use disease-free planting materials"
376
  ]
377
+ report.maintenance_tips = self._translate_list(default_tips, language)
378
 
379
+ # Expected yield
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
380
  report.expected_yield = detection_result.expected_yield
381
 
382
+ def _build_disease_report(
383
  self,
384
  report: DiagnosisReport,
385
  detection_result,
386
  language: str
387
  ) -> None:
388
+ """Build report for disease detection."""
389
+ # Disease information
390
+ report.disease_name = self._translate(
391
+ detection_result.display_name,
392
+ language
393
+ )
 
 
 
394
  report.disease_name_scientific = detection_result.scientific_name
395
  report.disease_category = detection_result.category
396
+
397
+ # Severity
398
+ report.severity_level = self._translate(
399
+ detection_result.severity_level.replace("_", " ").title(),
400
+ language
401
+ )
402
  report.severity_scale = detection_result.severity_scale
403
+ report.severity_description = self._translate(
404
+ detection_result.severity_description,
405
+ language
406
+ )
407
+
408
+ # Summary message
409
+ summary_en = self._create_summary_message(detection_result)
410
+ report.summary_message = self._translate(summary_en, language)
411
+
412
+ # Symptoms
413
+ if detection_result.symptoms:
414
+ report.symptoms = self._translate_list(
415
+ detection_result.symptoms[:6],
416
+ language
417
+ )
418
+
419
+ # Transmission
420
+ if detection_result.transmission:
421
+ report.transmission = self._translate_list(
422
+ detection_result.transmission[:5],
423
+ language
424
+ )
425
+
426
+ # Yield impact
427
  report.yield_loss_min = detection_result.yield_loss_min
428
  report.yield_loss_max = detection_result.yield_loss_max
429
+ yield_msg_en = f"This disease can cause {detection_result.yield_loss_min}% to {detection_result.yield_loss_max}% yield loss if not treated."
430
+ report.yield_loss_message = self._translate(yield_msg_en, language)
431
+
432
+ # Treatments
433
+ self._build_treatment_sections(report, detection_result, language)
434
+
435
+ # Costs
436
  report.treatment_cost_min = detection_result.treatment_cost_min
437
  report.treatment_cost_max = detection_result.treatment_cost_max
438
+ cost_msg_en = f"Estimated treatment cost: ₦{detection_result.treatment_cost_min:,} to ₦{detection_result.treatment_cost_max:,} per hectare."
439
+ report.cost_message = self._translate(cost_msg_en, language)
440
+
441
+ # Prevention
442
+ if detection_result.prevention:
443
+ report.prevention_tips = self._translate_list(
444
+ detection_result.prevention[:6],
445
+ language
446
+ )
447
 
448
+ # Health projection
449
+ self._build_health_projection(report, detection_result, language)
450
+
451
+ # Expert contact
452
  expert = detection_result.expert_contact
453
  if expert:
454
  report.expert_institution = expert.get("institution", "")
455
  report.expert_location = expert.get("location", "")
456
  report.expert_services = expert.get("services", "")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
457
 
458
+ def _create_summary_message(self, detection_result) -> str:
459
+ """Create English summary message for disease detection."""
460
+ severity = detection_result.severity_level.replace("_", " ")
461
+ confidence_pct = int(detection_result.confidence * 100)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
462
 
463
+ if detection_result.confidence >= 0.85:
464
+ confidence_text = "high confidence"
465
+ elif detection_result.confidence >= 0.60:
466
+ confidence_text = "moderate confidence"
467
+ else:
468
+ confidence_text = "low confidence"
 
 
 
 
469
 
470
+ return (
471
+ f"We detected {detection_result.display_name} in your {detection_result.crop_type} "
472
+ f"with {confidence_text} ({confidence_pct}%). "
473
+ f"This is a {severity} severity disease. "
474
+ f"Please follow the treatment recommendations below to protect your crop."
475
+ )
 
 
 
 
 
476
 
477
+ def _build_treatment_sections(
478
  self,
479
  report: DiagnosisReport,
480
+ detection_result,
481
+ language: str
482
  ) -> None:
483
+ """Build all treatment sections of the report."""
484
  treatments = detection_result.treatments
485
 
486
+ # Immediate actions (cultural practices)
487
  cultural = treatments.get("cultural", [])
488
  for t in cultural[:4]:
489
  action = {
490
+ "action": self._translate(t.get("method", ""), language),
491
+ "description": self._translate(t.get("description", ""), language),
492
  "effectiveness": t.get("effectiveness", ""),
493
  "timing": t.get("timing", "")
494
  }
 
502
  "local_brands": t.get("local_brands", []),
503
  "dosage": t.get("dosage", ""),
504
  "frequency": t.get("frequency", ""),
505
+ "application_method": self._translate(
506
+ t.get("application_method", ""),
507
+ language
508
+ ),
509
  "cost_min": t.get("cost_ngn_min", 0),
510
  "cost_max": t.get("cost_ngn_max", 0),
511
  "effectiveness": t.get("effectiveness", ""),
512
+ "safety_precautions": self._translate_list(
513
+ t.get("safety_precautions", [])[:3],
514
+ language
515
+ )
516
  }
517
  report.chemical_treatments.append(treatment)
518
 
519
+ # Biological/organic treatments
520
  biological = treatments.get("biological", [])
521
  for t in biological[:2]:
522
  treatment = {
523
+ "method": self._translate(t.get("method", ""), language),
524
+ "description": self._translate(t.get("description", ""), language),
525
  "effectiveness": t.get("effectiveness", ""),
526
  "source": t.get("source", "")
527
  }
 
531
  traditional = treatments.get("traditional", [])
532
  for t in traditional[:3]:
533
  treatment = {
534
+ "method": self._translate(t.get("method", ""), language),
535
+ "description": self._translate(t.get("description", ""), language),
536
  "cost": t.get("cost_ngn", 0),
537
  "effectiveness": t.get("effectiveness", "")
538
  }
 
546
  "resistance_level": v.get("resistance_level", ""),
547
  "source": v.get("source", ""),
548
  "cost": v.get("cost_ngn_per_bundle", 0),
549
+ "notes": self._translate(v.get("notes", ""), language)
550
  }
551
  report.resistant_varieties.append(variety)
552
 
553
+ def _build_health_projection(
554
  self,
555
  report: DiagnosisReport,
556
+ detection_result,
557
+ language: str
558
  ) -> None:
559
+ """Build health projection section."""
560
+ projection = detection_result.health_projection
561
+
562
+ if not projection:
563
+ return
564
+
565
+ # Translate all projection stages
566
+ for stage, info in projection.items():
567
+ if isinstance(info, dict):
568
+ report.health_projection[stage] = {
569
+ "recovery_chance_percent": info.get("recovery_chance_percent", 0),
570
+ "message": self._translate(info.get("message", ""), language)
571
+ }
572
+
573
+ # Set current projection based on confidence
574
+ # Higher confidence often correlates with more visible/advanced symptoms
575
  if detection_result.confidence >= 0.85:
576
+ # Clear symptoms suggest moderate to severe infection
577
  stage = "moderate_infection"
578
  elif detection_result.confidence >= 0.60:
579
+ # Some symptoms visible - likely early detection
580
  stage = "early_detection"
581
  else:
582
+ # Low confidence - could be very early
583
  stage = "early_detection"
584
 
585
  if stage in report.health_projection:
586
  report.current_projection = report.health_projection[stage]
587
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
588
  # =========================================================================
589
  # TRANSLATION HELPERS
590
  # =========================================================================
591
 
592
  def _translate(self, text: str, language: str) -> str:
593
+ """Translate text to target language."""
594
  if not text or language == "en":
595
  return text
596
 
 
600
  logger.warning(f"Translation failed: {e}")
601
  return text
602
 
603
+ def _translate_list(self, texts: List[str], language: str) -> List[str]:
604
+ """Translate a list of texts."""
 
 
 
 
 
605
  if not texts or language == "en":
606
  return texts
607
 
 
611
  logger.warning(f"Batch translation failed: {e}")
612
  return texts
613
 
 
 
 
 
614
  # =========================================================================
615
  # UTILITY METHODS
616
  # =========================================================================
 
721
 
722
  if __name__ == "__main__":
723
  print("=" * 60)
724
+ print("Diagnosis Generator Service Test")
725
  print("=" * 60)
726
 
727
  # Initialize generator
 
761
  print(f" Report ID: {test_report.report_id}")
762
  print(f" Short Summary: {test_report.get_short_summary()}")
763
 
764
+ print("\n6. To generate actual diagnosis (requires models):")
 
 
 
 
 
 
765
  print(" >>> report = generator.generate('/path/to/image.jpg', 'ha')")
766
  print(" >>> print(report.summary_message)")
767
  print(" >>> print(report.to_json())")