JustNikunj commited on
Commit
5e32e8d
Β·
verified Β·
1 Parent(s): 6b8f285

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -39
app.py CHANGED
@@ -49,18 +49,20 @@ def load_models():
49
  # Load IndicWhisper for Hindi ASR
50
  print("🎀 Loading IndicWhisper Hindi ASR model...")
51
  try:
52
- ASR_PROCESSOR = AutoProcessor.from_pretrained("vasista22/whisper-hindi-medium")
53
- ASR_MODEL = AutoModelForSpeechSeq2Seq.from_pretrained("vasista22/whisper-hindi-medium")
54
-
55
- # Create pipeline with the loaded model
56
  ASR_PIPELINE = pipeline(
57
  "automatic-speech-recognition",
58
- model=ASR_MODEL,
59
- tokenizer=ASR_PROCESSOR.tokenizer,
60
- feature_extractor=ASR_PROCESSOR.feature_extractor,
61
- device="cpu",
62
- chunk_length_s=30
 
 
 
 
63
  )
 
64
  print("βœ… IndicWhisper Hindi ASR model loaded successfully")
65
  except Exception as e:
66
  print(f"❌ Error loading IndicWhisper, trying fallback: {e}")
@@ -360,8 +362,7 @@ def predict(audio_filepath):
360
  # Validation
361
  if audio_filepath is None:
362
  return {
363
- "⚠️ Error": 1.0,
364
- "Message": "No audio file uploaded"
365
  }
366
 
367
  # ============================================
@@ -384,13 +385,8 @@ def predict(audio_filepath):
384
  # ============================================
385
  print("πŸ”„ Transcribing with cached IndicWhisper model...")
386
  try:
387
- result = ASR_PIPELINE(
388
- audio_filepath,
389
- generate_kwargs={
390
- "language": "hindi",
391
- "task": "transcribe"
392
- }
393
- )
394
 
395
  transcription = result["text"].strip()
396
  print(f"πŸ“ Transcription: '{transcription}'")
@@ -398,8 +394,7 @@ def predict(audio_filepath):
398
  except Exception as asr_error:
399
  print(f"❌ ASR Error: {asr_error}")
400
  return {
401
- "⚠️ ASR Error": 1.0,
402
- "Message": str(asr_error)
403
  }
404
 
405
  # ============================================
@@ -407,8 +402,7 @@ def predict(audio_filepath):
407
  # ============================================
408
  if not transcription or len(transcription) < 2:
409
  return {
410
- "⚠️ No Speech Detected": 1.0,
411
- "Transcription": transcription or "Empty"
412
  }
413
 
414
  is_valid, validation_msg, hindi_ratio = validate_hindi_text(transcription)
@@ -416,9 +410,8 @@ def predict(audio_filepath):
416
 
417
  if not is_valid:
418
  return {
419
- "⚠️ Language Error": 1.0,
420
- "Message": validation_msg,
421
- "Transcription": transcription
422
  }
423
 
424
  # ============================================
@@ -435,19 +428,25 @@ def predict(audio_filepath):
435
  )
436
 
437
  # ============================================
438
- # STEP 5: Format Results
439
  # ============================================
440
  result_dict = {}
441
 
 
442
  for sentiment, score in sorted(sentiment_scores.items(), key=lambda x: x[1], reverse=True):
443
  result_dict[f"{sentiment}"] = float(score)
444
 
445
- result_dict["πŸ“ Transcription"] = transcription
446
- result_dict["🎯 Confidence"] = float(confidence)
447
- result_dict["πŸ”€ Mixed Emotions"] = "Yes" if is_mixed else "No"
448
- result_dict["🌐 Hindi Content"] = f"{hindi_ratio*100:.0f}%"
 
449
 
 
 
450
  print(f"βœ… Complete! Confidence: {confidence:.3f}")
 
 
451
  print(f"{'='*60}\n")
452
 
453
  return result_dict
@@ -455,9 +454,7 @@ def predict(audio_filepath):
455
  except Exception as sentiment_error:
456
  print(f"❌ Sentiment Error: {sentiment_error}")
457
  return {
458
- "⚠️ Sentiment Error": 1.0,
459
- "Message": str(sentiment_error),
460
- "Transcription": transcription
461
  }
462
 
463
  except Exception as e:
@@ -465,8 +462,7 @@ def predict(audio_filepath):
465
  import traceback
466
  traceback.print_exc()
467
  return {
468
- "⚠️ System Error": 1.0,
469
- "Message": str(e)
470
  }
471
 
472
  # ============================================
@@ -508,10 +504,10 @@ demo = gr.Interface(
508
 
509
  ### πŸ“Š Output Includes:
510
  - Sentiment probabilities (Positive/Negative/Neutral)
511
- - Exact transcription in Hindi/Devanagari
512
- - Confidence score (how sure the model is)
513
- - Mixed emotion indicator
514
- - Language composition (% Hindi content)
515
 
516
  ### πŸ’‘ Best Practices:
517
  1. Speak clearly for 3-10 seconds
 
49
  # Load IndicWhisper for Hindi ASR
50
  print("🎀 Loading IndicWhisper Hindi ASR model...")
51
  try:
52
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
 
 
 
53
  ASR_PIPELINE = pipeline(
54
  "automatic-speech-recognition",
55
+ model="vasista22/whisper-hindi-medium",
56
+ chunk_length_s=30,
57
+ device=device
58
+ )
59
+
60
+ # FIX: Set forced_decoder_ids properly for the model config
61
+ ASR_PIPELINE.model.config.forced_decoder_ids = ASR_PIPELINE.tokenizer.get_decoder_prompt_ids(
62
+ language="hi",
63
+ task="transcribe"
64
  )
65
+
66
  print("βœ… IndicWhisper Hindi ASR model loaded successfully")
67
  except Exception as e:
68
  print(f"❌ Error loading IndicWhisper, trying fallback: {e}")
 
362
  # Validation
363
  if audio_filepath is None:
364
  return {
365
+ "⚠️ Error": "No audio file uploaded"
 
366
  }
367
 
368
  # ============================================
 
385
  # ============================================
386
  print("πŸ”„ Transcribing with cached IndicWhisper model...")
387
  try:
388
+ # FIX: Don't pass language in generate_kwargs, it's already set in model config
389
+ result = ASR_PIPELINE(audio_filepath)
 
 
 
 
 
390
 
391
  transcription = result["text"].strip()
392
  print(f"πŸ“ Transcription: '{transcription}'")
 
394
  except Exception as asr_error:
395
  print(f"❌ ASR Error: {asr_error}")
396
  return {
397
+ "⚠️ ASR Error": str(asr_error)
 
398
  }
399
 
400
  # ============================================
 
402
  # ============================================
403
  if not transcription or len(transcription) < 2:
404
  return {
405
+ "⚠️ No Speech Detected": f"Transcription: {transcription or 'Empty'}"
 
406
  }
407
 
408
  is_valid, validation_msg, hindi_ratio = validate_hindi_text(transcription)
 
410
 
411
  if not is_valid:
412
  return {
413
+ "⚠️ Language Error": validation_msg,
414
+ "πŸ“ Transcription": transcription
 
415
  }
416
 
417
  # ============================================
 
428
  )
429
 
430
  # ============================================
431
+ # STEP 5: Format Results (FIX: All values must be float)
432
  # ============================================
433
  result_dict = {}
434
 
435
+ # Add sentiment scores (all floats)
436
  for sentiment, score in sorted(sentiment_scores.items(), key=lambda x: x[1], reverse=True):
437
  result_dict[f"{sentiment}"] = float(score)
438
 
439
+ # FIX: Convert all metadata to float values for compatibility
440
+ # Use very small values to put them at the bottom of the sorted list
441
+ result_dict["_Confidence"] = float(confidence)
442
+ result_dict["_Mixed_Emotions"] = 1.0 if is_mixed else 0.0
443
+ result_dict["_Hindi_Content_Pct"] = float(hindi_ratio * 100)
444
 
445
+ # Store transcription separately for display
446
+ print(f"πŸ“ Full Transcription: {transcription}")
447
  print(f"βœ… Complete! Confidence: {confidence:.3f}")
448
+ print(f"πŸ”€ Mixed Emotions: {'Yes' if is_mixed else 'No'}")
449
+ print(f"🌐 Hindi Content: {hindi_ratio*100:.0f}%")
450
  print(f"{'='*60}\n")
451
 
452
  return result_dict
 
454
  except Exception as sentiment_error:
455
  print(f"❌ Sentiment Error: {sentiment_error}")
456
  return {
457
+ "⚠️ Sentiment Error": str(sentiment_error)
 
 
458
  }
459
 
460
  except Exception as e:
 
462
  import traceback
463
  traceback.print_exc()
464
  return {
465
+ "⚠️ System Error": str(e)
 
466
  }
467
 
468
  # ============================================
 
504
 
505
  ### πŸ“Š Output Includes:
506
  - Sentiment probabilities (Positive/Negative/Neutral)
507
+ - _Confidence: Prediction confidence score
508
+ - _Mixed_Emotions: 1.0 if mixed, 0.0 if not
509
+ - _Hindi_Content_Pct: Percentage of Hindi characters
510
+ - Check console logs for full transcription
511
 
512
  ### πŸ’‘ Best Practices:
513
  1. Speak clearly for 3-10 seconds