import gradio as gr
import languagecodes
import inspect
import httpx, os
import polars as pl 

df = pl.read_parquet("isolanguages.parquet")
non_empty_isos = df.slice(1).filter(pl.col("ISO639-1") != "").rows()
# all_langs = languagecodes.iso_languages_byname
all_langs = {iso[0]: (iso[1], iso[2], iso[3]) for iso in non_empty_isos} # {'Romanian': ('ro', 'rum', 'ron')}
iso1toall = {iso[1]: (iso[0], iso[2], iso[3]) for iso in non_empty_isos} # {'ro': ('Romanian', 'rum', 'ron')}
DEFAULTS = None
  
libraries = ["langdetect", "py3langid", "langid", "lingua-py", "pycld2", "fastlangdetect", "fasttext", "openlid", "glotlid"]

class Detect():
    def __init__(self, text: str) -> None:
        self.text: str = text
    def langdetect(self) -> list[str, float]:
        from langdetect import detect, detect_langs
        from langdetect import DetectorFactory
        DetectorFactory.seed = 0
        langcode = detect(self.text)
        langecode_probabilities: list[Language] = detect_langs(self.text)
        return [langcode, round(number=langecode_probabilities[0].prob * 100, ndigits=2)]
    def langid(self) -> list[str, float]:
        from langid.langid import LanguageIdentifier, model
        identifier = LanguageIdentifier.from_modelstring(string=model, norm_probs=True)
        idresult: list[str, float] = list(identifier.classify(self.text))
        return [idresult[0], abs(round(number=idresult[1] * 100, ndigits=2))]
    def py3langid(self) -> list[str, float]:
        langs = ["af", "am", "an", "ar", "as", "az", "be", "bg", "bn", "br", "bs", "ca", "cs", "cy",
                 "da", "de", "dz", "el", "en", "eo", "es", "et", "eu", "fa", "fi", "fo", "fr", "ga",
                 "gl", "gu", "he", "hi", "hr", "ht", "hu", "hy", "id", "is", "it", "ja", "jv", "ka",
                 "kk", "km", "kn", "ko", "ku", "ky", "la", "lb", "lo", "lt", "lv", "mg", "mk", "ml",
                 "mn", "mr", "ms", "mt", "nb", "ne", "nl", "nn", "no", "oc", "or", "pa", "pl", "ps",
                 "pt", "qu", "ro", "ru", "rw", "se", "si", "sk", "sl", "sq", "sr", "sv", "sw", "ta",
                 "te", "th", "tl", "tr", "ug", "uk", "ur", "vi", "vo", "wa", "xh", "zh", "zu"]
        import py3langid
        lang, prob = py3langid.classify(self.text) # unpack the result tuple in variables
        return [lang, abs(round(number=prob, ndigits=2))]       
    def lingua(self) -> list[str, float]:
        from lingua import Language, LanguageDetectorBuilder
        detector: LanguageDetector = LanguageDetectorBuilder.from_all_languages().with_preloaded_language_models().build()
        confidence_values: List[ConfidenceValue] = detector.compute_language_confidence_values(self.text)
        return [confidence_values[0].language.iso_code_639_1.name.lower(), "{0:.2f}".format(confidence_values[0].value * 100)]
    def fasttextlangdetect(self) -> list[str, float]: # requires numpy < 2.0
        from ftlangdetect import detect
        result = detect(text=self.text, low_memory=False)
        return [result.get('lang'), abs(round(number=result.get('score') * 100, ndigits=2))]
    def fastlangdetect(self) -> list[str, float]:
        from fast_langdetect import detect
        result = detect(text=self.text, model="auto", k=1)[0]
        return [result.get('lang'), abs(round(number=result.get('score') * 100, ndigits=2))]
    def pycld2(self) -> list[str, float]:
        import pycld2 as cld2
        # available_languages = cld2.LANGUAGES
        isReliable, textBytesFound, details = cld2.detect(self.text, returnVectors=False, bestEffort=True)
        return [details[0][1], round(details[0][2], 2)]
    def parse_fastext(self, repo_id, k=3):
        import fasttext
        from huggingface_hub import hf_hub_download
        model_path = hf_hub_download(repo_id=repo_id, filename="model.bin")
        model = fasttext.load_model(model_path)
        language, probabilities = model.predict(self.text, k=k)
        reversed_nllb_langs = {v: k for k, v in languagecodes.nllb_language_codes.items()}
        long_langname = reversed_nllb_langs[language[0].replace('__label__', '')]
        lang_code = all_langs[long_langname][0]
        return [lang_code, round(number=probabilities[0] * 100, ndigits=2)]     
    def fasttext(self) -> list[str, float]:
        import fasttext
        from huggingface_hub import hf_hub_download
        model_path = hf_hub_download(repo_id="facebook/fasttext-language-identification", filename="model.bin")
        model = fasttext.load_model(model_path)
        language, probabilities = model.predict(self.text, k=3)
        reversed_nllb_langs = {v: k for k, v in languagecodes.nllb_language_codes.items()}
        long_langname = reversed_nllb_langs[language[0].replace('__label__', '')]
        lang_code = all_langs[long_langname][0]
        return [lang_code, round(number=probabilities[0] * 100, ndigits=2)]
    def openlid(self) -> list[str, float]:
        import fasttext
        from huggingface_hub import hf_hub_download
        model_path = hf_hub_download(repo_id="laurievb/OpenLID-v2", filename="model.bin")
        model = fasttext.load_model(model_path)
        language, probabilities = model.predict(self.text, k=3)
        reversed_nllb_langs = {v: k for k, v in languagecodes.nllb_language_codes.items()}
        long_langname = reversed_nllb_langs[language[0].replace('__label__', '')]
        lang_code = all_langs[long_langname][0]
        return [lang_code, round(number=probabilities[0] * 100, ndigits=2)]
    def glotlid(self) -> list[str, float]:
        repo_id="cis-lmu/glotlid"
        return self.parse_fastext(repo_id)

def detect_language(input_text: str, used_libraries: list[str]) -> tuple[str, str]:
    """
    Detects the language of the input text.

    Parameters:
        input_text (str): The source text to be translated
        used_libraries: (list[str]) The libraries to be used for detection
    Returns:
        list of lists with: 
            detected_text(str): The language code of the input text
            confidence(float):  The confidence score as float
    
    Example:
        >>> detect_language("Hello world", ["langdetect", "langid", "lingua-py", "fasttextlangdetect", "fastlangdetect"])
        [["en", 1.0]]
    """
    detectinstance = Detect(input_text)
    detections = []
    if not input_text or not used_libraries:
        return [['No input text or library selected', 'Please provide input text and/or select a detection library']]
    if 'langdetect' in used_libraries:
        detections.append(['langdetect'] + detectinstance.langdetect())
    if 'langid' in used_libraries:
        detections.append(['langid'] + detectinstance.langid())
    if 'py3langid' in used_libraries:
        detections.append(['py3langid'] + detectinstance.py3langid())
    if 'lingua-py' in used_libraries:
        detections.append(['lingua-py'] + detectinstance.lingua())
    if 'pycld2' in used_libraries:
        detections.append(['pycld2'] + detectinstance.pycld2())
    if 'fastlangdetect' in used_libraries:
        detections.append(['fastlangdetect'] + detectinstance.fastlangdetect())
    if 'fasttext' in used_libraries:
        detections.append(['fasttext'] + detectinstance.fasttext())
    if 'openlid' in used_libraries:
        detections.append(['openlid'] + detectinstance.openlid())
    if 'glotlid' in used_libraries:
        detections.append(['glotlid'] + detectinstance.glotlid())
    unique_codes = list(set([x[1] for x in detections]))
    unique_languages = [iso1toall[x][0] for x in unique_codes]
    detections.append([f'Unique languages: {unique_languages}', f'Unique codes: {unique_codes}', f'Languages detected: {len(unique_codes)}'])
    print(unique_codes, unique_languages, detections)
    return detections
    
with gr.Blocks() as interface:
    gr.Markdown("### Language Detection with Gradio API and MCP Server")
    input_text = gr.Textbox(label="Enter text to detect:", placeholder="Type/copy text here, maximum 512 characters",
                            autofocus=True, submit_btn='Detect Language', max_length=512) 
    with gr.Row(variant="compact"):
        used_libraries = gr.CheckboxGroup(choices=libraries, value=libraries, label="Detection libraries", show_select_all=True)
    dataframe = gr.Dataframe(
            headers=["Library", "Language  code", "Score"],
            datatype=["str", "str", "number"],
            type='array',
            row_count=len(libraries),
            column_count=3,
            column_limits=(2, 4),
            label='Language detection dataframe'
        )
    input_text.submit(
    fn=detect_language,
    inputs=[input_text, used_libraries],
    outputs=[dataframe]
    )
    
if __name__ == "__main__":
    interface.launch(mcp_server=True, footer_links=["api", "settings"])
    # interface.queue().launch(server_name="0.0.0.0", show_error=True, server_port=7860, mcp_server=True)