|
|
from fastapi import FastAPI |
|
|
from huggingface_hub import hf_hub_download |
|
|
import os |
|
|
from pydantic import BaseModel |
|
|
from fastapi.responses import JSONResponse |
|
|
|
|
|
print("Version ---- 2") |
|
|
app = FastAPI() |
|
|
|
|
|
def download_file_from_hf(repo_id, filename): |
|
|
target_dir = os.path.expanduser("~/.sinatools") |
|
|
os.makedirs(target_dir, exist_ok=True) |
|
|
|
|
|
file_path = hf_hub_download( |
|
|
repo_id=repo_id, |
|
|
filename=filename, |
|
|
local_dir=target_dir, |
|
|
local_dir_use_symlinks=False |
|
|
) |
|
|
|
|
|
return file_path |
|
|
|
|
|
download_file_from_hf("SinaLab/ALMA","lemmas_dic.pickle") |
|
|
download_file_from_hf("SinaLab/ArabGlossBERT","two_grams.pickle") |
|
|
download_file_from_hf("SinaLab/ArabGlossBERT","three_grams.pickle") |
|
|
download_file_from_hf("SinaLab/ArabGlossBERT","four_grams.pickle") |
|
|
download_file_from_hf("SinaLab/ArabGlossBERT","five_grams.pickle") |
|
|
|
|
|
from sinatools.morphology.morph_analyzer import analyze |
|
|
from sinatools.utils.tokenizer import sentence_tokenizer |
|
|
|
|
|
class ALMARequest(BaseModel): |
|
|
text: str |
|
|
language: str |
|
|
task: str |
|
|
flag: str |
|
|
|
|
|
@app.post("/predict") |
|
|
def predict(request: ALMARequest): |
|
|
text = request.text |
|
|
language = request.language |
|
|
task = request.task |
|
|
flag = request.flag |
|
|
|
|
|
sentences = sentence_tokenizer(text, dot=True, new_line=True, question_mark=True, exclamation_mark=True) |
|
|
|
|
|
results_with_sentences = [] |
|
|
|
|
|
for i, sentence in enumerate(sentences): |
|
|
sentence_id = i + 1 |
|
|
|
|
|
result = analyze(sentence, language, task, str(flag)) |
|
|
|
|
|
results_with_sentences.append({ |
|
|
"sentence_id": sentence_id, |
|
|
"sentence": sentence, |
|
|
"lemmatizer_results": result |
|
|
}) |
|
|
|
|
|
content = {"resp": results_with_sentences, "statusText": "OK", "statusCode": 0} |
|
|
|
|
|
return JSONResponse( |
|
|
content=content, |
|
|
media_type="application/json", |
|
|
status_code=200, |
|
|
) |
|
|
|