from fastapi import FastAPI from huggingface_hub import hf_hub_download import os from pydantic import BaseModel from fastapi.responses import JSONResponse print("Version ---- 2") app = FastAPI() def download_file_from_hf(repo_id, filename): target_dir = os.path.expanduser("~/.sinatools") os.makedirs(target_dir, exist_ok=True) file_path = hf_hub_download( repo_id=repo_id, filename=filename, local_dir=target_dir, local_dir_use_symlinks=False ) return file_path download_file_from_hf("SinaLab/ALMA","lemmas_dic.pickle") download_file_from_hf("SinaLab/ArabGlossBERT","two_grams.pickle") download_file_from_hf("SinaLab/ArabGlossBERT","three_grams.pickle") download_file_from_hf("SinaLab/ArabGlossBERT","four_grams.pickle") download_file_from_hf("SinaLab/ArabGlossBERT","five_grams.pickle") from sinatools.morphology.morph_analyzer import analyze from sinatools.utils.tokenizer import sentence_tokenizer class ALMARequest(BaseModel): text: str language: str task: str flag: str @app.post("/predict") def predict(request: ALMARequest): text = request.text language = request.language task = request.task flag = request.flag sentences = sentence_tokenizer(text, dot=True, new_line=True, question_mark=True, exclamation_mark=True) results_with_sentences = [] for i, sentence in enumerate(sentences): sentence_id = i + 1 result = analyze(sentence, language, task, str(flag)) results_with_sentences.append({ "sentence_id": sentence_id, "sentence": sentence, "lemmatizer_results": result }) content = {"resp": results_with_sentences, "statusText": "OK", "statusCode": 0} return JSONResponse( content=content, media_type="application/json", status_code=200, )