Spaces:

kasimali
/

xlit-testing

Runtime error

App Files Files Community

kasimali commited on Oct 8, 2025

Commit

571d55d

verified ·

1 Parent(s): 496a913

Upload folder using huggingface_hub

Browse files

Files changed (3) hide show

README.md +3 -8
app.py +262 -0
requirements.txt +2 -0

README.md CHANGED Viewed

@@ -1,12 +1,7 @@
 ---
-title: Xlit Testing
-emoji: 👁
-colorFrom: green
-colorTo: purple
 sdk: gradio
-sdk_version: 5.49.0
-app_file: app.py
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: XLIT-TESTING
+emoji: 🚀
 sdk: gradio
 ---
+# XLIT-TESTING

app.py ADDED Viewed

	@@ -0,0 +1,262 @@

+# XLIT-TESTING
+import gradio as gr
+import pandas as pd
+import requests
+from typing import List, Dict, Union, Optional
+import io
+# YOUR EXACT IndicXlit API Code (no changes)
+class IndicXlitClient:
+    """Simple client for IndicXlit Transliteration API"""
+    def __init__(self, api_url: str = "https://awake-blowfish-liberal.ngrok-free.app"):
+        self.api_url = api_url.rstrip('/')
+        self.session = requests.Session()
+        self.session.headers.update({
+            'Content-Type': 'application/json',
+            'Accept': 'application/json'
+        })
+    def health_check(self) -> dict:
+        try:
+            response = self.session.get(f"{self.api_url}/health")
+            response.raise_for_status()
+            return response.json()
+        except Exception as e:
+            return {"error": str(e), "status": "unhealthy"}
+    def get_supported_languages(self) -> List[str]:
+        try:
+            response = self.session.get(f"{self.api_url}/languages")
+            response.raise_for_status()
+            data = response.json()
+            return data.get("supported_languages", [])
+        except Exception as e:
+            print(f"Error getting languages: {e}")
+            return []
+    def english_to_indic(self, text: str, target_languages: Union[str, List[str]], beam_width: int = 4) -> Dict[str, str]:
+        try:
+            payload = {
+                "text": text,
+                "target_languages": target_languages,
+                "beam_width": beam_width
+            }
+            response = self.session.post(
+                f"{self.api_url}/transliterate/en-to-indic",
+                json=payload
+            )
+            response.raise_for_status()
+            result = response.json()
+            if result.get("success"):
+                return result.get("results", {})
+            else:
+                print(f"API Error: {result}")
+                return {}
+        except Exception as e:
+            print(f"Error transliterating: {e}")
+            return {}
+# Create global client instance
+client = IndicXlitClient()
+# Convenience functions
+def transliterate_from_en(text: str, target_languages: Union[str, List[str]]) -> Dict[str, str]:
+    return client.english_to_indic(text, target_languages)
+def get_supported_languages() -> List[str]:
+    return client.get_supported_languages()
+def check_api_health() -> bool:
+    health = client.health_check()
+    return health.get("status") == "healthy"
+# Test API connectivity
+print("🔄 Testing IndicXlit API connectivity...")
+if check_api_health():
+    print("✅ IndicXlit API is healthy and ready!")
+    supported_langs = get_supported_languages()
+    print(f"📋 Supported languages: {supported_langs}")
+    print(f"📊 Total supported languages: {len(supported_langs)}")
+else:
+    print("⚠️ IndicXlit API is not available")
+    print("❌ Please check your API URL or connection")
+print("✅ IndicXlit API setup completed!")
+# Master language mapping for IndicXlit model testing
+INDICXLIT_LANGUAGE_MAPPING = {
+    # Language name to IndicXlit API code mapping
+    'assamese': 'as',
+    'bengali': 'bn',
+    'bodo': 'brx',
+    'gujarati': 'gu',
+    'hindi': 'hi',
+    'kannada': 'kn',
+    'kashmiri': 'ks',
+    'konkani': 'gom',  # IndicXlit uses 'gom' for Konkani
+    'maithili': 'mai',
+    'malayalam': 'ml',
+    'marathi': 'mr',
+    'manipuri': 'mni',
+    'nepali': 'ne',
+    'odia': 'or',
+    'punjabi': 'pa',
+    'sanskrit': 'sa',
+    'sindhi': 'sd',
+    'tamil': 'ta',
+    'telugu': 'te',
+    'urdu': 'ur'
+}
+# Languages NOT supported by IndicXlit (based on your previous testing)
+UNSUPPORTED_LANGUAGES = ['dogri', 'santali']
+print("📋 IndicXlit Language Mapping:")
+for lang_name, code in INDICXLIT_LANGUAGE_MAPPING.items():
+    print(f"  {lang_name.capitalize()}: {code}")
+print(f"\n⚠️ Unsupported languages: {', '.join(UNSUPPORTED_LANGUAGES)}")
+print(f"✅ Total mappings loaded: {len(INDICXLIT_LANGUAGE_MAPPING)}")
+from google.colab import files
+import pandas as pd
+def process_excel_dataset_with_indicxlit():
+    """
+    Process Excel dataset using ONLY IndicXlit model
+    Input: Excel file with columns - Language, Roman Script, Native Script, English Translation
+    Output: Excel with all ground truth columns + IndicXlit Native Output
+    """
+    print("📁 Please upload your Excel file containing the dataset...")
+    uploaded = files.upload()
+    for filename in uploaded.keys():
+        print(f"📄 Processing file: {filename}")
+        # Read the Excel file
+        try:
+            df_input = pd.read_excel(filename)
+            print(f"✅ Successfully loaded Excel with {len(df_input)} rows")
+            # Display column names to verify structure
+            print(f"📋 Columns found: {list(df_input.columns)}")
+            # Identify columns (case-insensitive matching)
+            column_mapping = {}
+            for col in df_input.columns:
+                col_lower = col.lower().strip()
+                if 'language' in col_lower:
+                    column_mapping['language'] = col
+                elif 'roman' in col_lower:
+                    column_mapping['roman'] = col
+                elif 'native' in col_lower:
+                    column_mapping['native'] = col
+                elif 'english' in col_lower:
+                    column_mapping['english'] = col
+            print(f"🔍 Column mapping: {column_mapping}")
+            # Check if all required columns are found
+            if len(column_mapping) < 4:
+                print("❌ Could not identify all required columns (Language, Roman, Native, English)")
+                return None
+            results = []
+            print(f"🔄 Processing {len(df_input)} samples with IndicXlit model...")
+            for i, row in df_input.iterrows():
+                language = str(row[column_mapping['language']]).lower().strip()
+                roman_text = str(row[column_mapping['roman']]).strip()
+                native_ground_truth = str(row[column_mapping['native']]).strip()
+                english_text = str(row[column_mapping['english']]).strip()
+                # Skip if language not supported
+                if language in UNSUPPORTED_LANGUAGES:
+                    indicxlit_native_output = "NOT_SUPPORTED"
+                    status = "UNSUPPORTED_LANGUAGE"
+                    target_code = "N/A"
+                elif language in INDICXLIT_LANGUAGE_MAPPING:
+                    target_code = INDICXLIT_LANGUAGE_MAPPING[language]
+                    try:
+                        # Use IndicXlit API for transliteration
+                        api_results = transliterate_from_en(roman_text, target_code)
+                        if api_results and target_code in api_results:
+                            indicxlit_native_output = api_results[target_code]
+                            status = "SUCCESS"
+                        else:
+                            indicxlit_native_output = roman_text  # Fallback to original
+                            status = "API_FAILED"
+                    except Exception as e:
+                        indicxlit_native_output = roman_text  # Fallback to original
+                        status = f"ERROR: {str(e)}"
+                else:
+                    indicxlit_native_output = "LANGUAGE_NOT_MAPPED"
+                    status = "UNKNOWN_LANGUAGE"
+                    target_code = "N/A"
+                # Create result row with all ground truth + IndicXlit output
+                results.append({
+                    'Language': language.capitalize(),
+                    'Roman_Script_Input': roman_text,
+                    'Native_Script_Ground_Truth': native_ground_truth,
+                    'English_Translation_Ground_Truth': english_text,
+                    'IndicXlit_Native_Output': indicxlit_native_output,
+                    'Processing_Status': status,
+                    'IndicXlit_Code': target_code
+                })
+                if (i + 1) % 50 == 0:
+                    print(f"✅ Processed {i + 1}/{len(df_input)} samples...")
+            # Create results DataFrame
+            df_results = pd.DataFrame(results)
+            # Display summary
+            print("\n📊 Processing Summary:")
+            print(f"Total samples processed: {len(df_results)}")
+            print(f"Successful translations: {len(df_results[df_results['Processing_Status'] == 'SUCCESS'])}")
+            print(f"Failed translations: {len(df_results[df_results['Processing_Status'] != 'SUCCESS'])}")
+            # Language-wise breakdown
+            print(f"\n📈 Language-wise breakdown:")
+            lang_summary = df_results['Language'].value_counts()
+            for lang, count in lang_summary.items():
+                success_count = len(df_results[(df_results['Language'] == lang) & (df_results['Processing_Status'] == 'SUCCESS')])
+                print(f"  {lang}: {count} total, {success_count} successful")
+            # Save to Excel
+            output_filename = "indicxlit_excel_results_with_ground_truth.xlsx"
+            df_results.to_excel(output_filename, index=False, engine='openpyxl')
+            print(f"\n💾 Results saved to: {output_filename}")
+            # Download the file
+            # Display first few rows
+            print("\n📋 Sample Results:")
+            print(df_results.head())
+            return df_results
+        except Exception as e:
+            print(f"❌ Error processing Excel file: {str(e)}")
+            return None
+# Run the processing function
+print("🚀 Ready to process Excel dataset with IndicXlit model")
+print("📊 Expected Excel columns: Language, Roman Script, Native Script, English Translation")
+print("👆 Execute the function below to start:")
+print("df_results = process_excel_dataset_with_indicxlit()")
+df_results = process_excel_dataset_with_indicxlit()

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ gradio
2	+ pandas