File size: 1,905 Bytes
29436df 7c0d80d 29436df f43b328 29436df 7c0d80d b8833f3 7c0d80d f43b328 7c0d80d 34a52a1 7c0d80d 34a52a1 f43b328 7c0d80d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
from fastapi import FastAPI
from huggingface_hub import hf_hub_download
import os
from pydantic import BaseModel
from fastapi.responses import JSONResponse
print("Version ---- 2")
app = FastAPI()
def download_file_from_hf(repo_id, filename):
target_dir = os.path.expanduser("~/.sinatools")
os.makedirs(target_dir, exist_ok=True)
file_path = hf_hub_download(
repo_id=repo_id,
filename=filename,
local_dir=target_dir,
local_dir_use_symlinks=False
)
return file_path
download_file_from_hf("SinaLab/ALMA","lemmas_dic.pickle")
download_file_from_hf("SinaLab/ArabGlossBERT","two_grams.pickle")
download_file_from_hf("SinaLab/ArabGlossBERT","three_grams.pickle")
download_file_from_hf("SinaLab/ArabGlossBERT","four_grams.pickle")
download_file_from_hf("SinaLab/ArabGlossBERT","five_grams.pickle")
from sinatools.morphology.morph_analyzer import analyze
from sinatools.utils.tokenizer import sentence_tokenizer
class ALMARequest(BaseModel):
text: str
language: str
task: str
flag: str
@app.post("/predict")
def predict(request: ALMARequest):
text = request.text
language = request.language
task = request.task
flag = request.flag
sentences = sentence_tokenizer(text, dot=True, new_line=True, question_mark=True, exclamation_mark=True)
results_with_sentences = []
for i, sentence in enumerate(sentences):
sentence_id = i + 1
result = analyze(sentence, language, task, str(flag))
results_with_sentences.append({
"sentence_id": sentence_id,
"sentence": sentence,
"lemmatizer_results": result
})
content = {"resp": results_with_sentences, "statusText": "OK", "statusCode": 0}
return JSONResponse(
content=content,
media_type="application/json",
status_code=200,
)
|