Spaces:
Sleeping
Sleeping
| from transformers import BertForQuestionAnswering, BertTokenizer | |
| from farasa.segmenter import FarasaSegmenter | |
| import streamlit as st | |
| import torch | |
| import os | |
| checkpoint_path = os.path.abspath("./checkpoint-2817") | |
| # Load model | |
| model = BertForQuestionAnswering.from_pretrained(f"MarioMamdouh121/arabic-qa-model",use_safetensors=True, trust_remote_code=True) | |
| tokenizer = BertTokenizer.from_pretrained(f"MarioMamdouh121/arabic-qa-model", trust_remote_code=True) | |
| segmenter = FarasaSegmenter(interactive=False) | |
| # Streamlit interface | |
| st.title("Arabic Question Answering") | |
| st.write("أدخل سياقًا وسؤالًا بالعربية واحصل على الجواب.") | |
| context = st.text_area("السياق", height=150) | |
| question = st.text_input("السؤال") | |
| if st.button("احصل على الجواب") and context and question: | |
| # Preprocess | |
| context_proc = segmenter.segment(context) | |
| question_proc = segmenter.segment(question) | |
| # Tokenize | |
| inputs = tokenizer( | |
| question_proc, | |
| context_proc, | |
| return_tensors="pt", | |
| truncation=True, | |
| max_length=512 | |
| ) | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| start_index = torch.argmax(outputs.start_logits) | |
| end_index = torch.argmax(outputs.end_logits) | |
| answer_tokens = inputs["input_ids"][0][start_index : end_index + 1] | |
| answer = tokenizer.decode(answer_tokens, skip_special_tokens=True) | |
| answer = segmenter.desegment(answer) | |
| st.success(f"الجواب: {answer}") |