Spaces:

SinaLab
/

alma-api

Running

File size: 1,905 Bytes

from fastapi import FastAPI
from huggingface_hub import hf_hub_download
import os
from pydantic import BaseModel
from fastapi.responses import JSONResponse

print("Version ---- 2")
app = FastAPI()

def download_file_from_hf(repo_id, filename):
    target_dir = os.path.expanduser("~/.sinatools")
    os.makedirs(target_dir, exist_ok=True)

    file_path = hf_hub_download(
        repo_id=repo_id,
        filename=filename,
        local_dir=target_dir,
        local_dir_use_symlinks=False
    )

    return file_path

download_file_from_hf("SinaLab/ALMA","lemmas_dic.pickle")
download_file_from_hf("SinaLab/ArabGlossBERT","two_grams.pickle")
download_file_from_hf("SinaLab/ArabGlossBERT","three_grams.pickle")
download_file_from_hf("SinaLab/ArabGlossBERT","four_grams.pickle")
download_file_from_hf("SinaLab/ArabGlossBERT","five_grams.pickle")

from sinatools.morphology.morph_analyzer import analyze
from sinatools.utils.tokenizer import sentence_tokenizer

class ALMARequest(BaseModel):
    text: str
    language: str
    task: str
    flag: str

@app.post("/predict")
def predict(request: ALMARequest):
    text = request.text  
    language = request.language
    task = request.task
    flag = request.flag

    sentences = sentence_tokenizer(text, dot=True, new_line=True, question_mark=True, exclamation_mark=True)
    
    results_with_sentences = []
        
    for i, sentence in enumerate(sentences):
        sentence_id = i + 1
        
        result = analyze(sentence, language, task, str(flag))
            
        results_with_sentences.append({
            "sentence_id": sentence_id,
            "sentence": sentence,
            "lemmatizer_results": result
        })
    
    content = {"resp": results_with_sentences, "statusText": "OK", "statusCode": 0}

    return JSONResponse(
        content=content,
        media_type="application/json",
        status_code=200,
    )