File size: 1,905 Bytes
29436df
7c0d80d
 
 
 
29436df
f43b328
29436df
 
7c0d80d
 
 
 
 
 
 
 
 
 
 
 
 
 
b8833f3
 
 
 
7c0d80d
 
f43b328
7c0d80d
 
 
34a52a1
 
 
7c0d80d
 
 
 
34a52a1
 
 
 
f43b328
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7c0d80d
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
from fastapi import FastAPI
from huggingface_hub import hf_hub_download
import os
from pydantic import BaseModel
from fastapi.responses import JSONResponse

print("Version ---- 2")
app = FastAPI()

def download_file_from_hf(repo_id, filename):
    target_dir = os.path.expanduser("~/.sinatools")
    os.makedirs(target_dir, exist_ok=True)

    file_path = hf_hub_download(
        repo_id=repo_id,
        filename=filename,
        local_dir=target_dir,
        local_dir_use_symlinks=False
    )

    return file_path

download_file_from_hf("SinaLab/ALMA","lemmas_dic.pickle")
download_file_from_hf("SinaLab/ArabGlossBERT","two_grams.pickle")
download_file_from_hf("SinaLab/ArabGlossBERT","three_grams.pickle")
download_file_from_hf("SinaLab/ArabGlossBERT","four_grams.pickle")
download_file_from_hf("SinaLab/ArabGlossBERT","five_grams.pickle")

from sinatools.morphology.morph_analyzer import analyze
from sinatools.utils.tokenizer import sentence_tokenizer

class ALMARequest(BaseModel):
    text: str
    language: str
    task: str
    flag: str

@app.post("/predict")
def predict(request: ALMARequest):
    text = request.text  
    language = request.language
    task = request.task
    flag = request.flag

    sentences = sentence_tokenizer(text, dot=True, new_line=True, question_mark=True, exclamation_mark=True)
    
    results_with_sentences = []
        
    for i, sentence in enumerate(sentences):
        sentence_id = i + 1
        
        result = analyze(sentence, language, task, str(flag))
            
        results_with_sentences.append({
            "sentence_id": sentence_id,
            "sentence": sentence,
            "lemmatizer_results": result
        })
    
    content = {"resp": results_with_sentences, "statusText": "OK", "statusCode": 0}

    return JSONResponse(
        content=content,
        media_type="application/json",
        status_code=200,
    )