Spaces:
Runtime error
Runtime error
Update Sejarah.py
#2
by sengzi - opened
- Sejarah.py +14 -25
Sejarah.py
CHANGED
|
@@ -4,7 +4,7 @@ from haystack import Pipeline
|
|
| 4 |
from haystack.nodes import TextConverter, PreProcessor, BM25Retriever, FARMReader
|
| 5 |
from haystack.document_stores import InMemoryDocumentStore
|
| 6 |
from haystack.utils import print_answers
|
| 7 |
-
from
|
| 8 |
|
| 9 |
class Sejarah:
|
| 10 |
def __init__(self):
|
|
@@ -40,32 +40,26 @@ class Sejarah:
|
|
| 40 |
self.querying_pipeline.add_node(component=retriever, name="Retriever", inputs=["Query"])
|
| 41 |
self.querying_pipeline.add_node(component=reader, name="Reader", inputs=["Retriever"])
|
| 42 |
|
| 43 |
-
#Malay to English Model
|
| 44 |
-
self.id_en_tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-id-en")
|
| 45 |
-
self.id_en_model = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-id-en")
|
| 46 |
-
|
| 47 |
-
#English to Malay Model
|
| 48 |
-
self.en_id_tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-id")
|
| 49 |
-
self.en_id_model = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-en-id")
|
| 50 |
-
|
| 51 |
|
| 52 |
def language_converter(self, content, lang, method):
|
| 53 |
|
| 54 |
-
content = content.lower()
|
| 55 |
-
|
| 56 |
if lang == "en":
|
| 57 |
if method == "question":
|
| 58 |
-
|
| 59 |
-
translation = self.en_id_model.generate(**tokenized_text)
|
| 60 |
-
content = self.en_id_tokenizer.batch_decode(translation, skip_special_tokens=True)[0]
|
| 61 |
|
|
|
|
|
|
|
| 62 |
else:
|
| 63 |
-
|
| 64 |
-
translation = self.id_en_model.generate(**tokenized_text)
|
| 65 |
-
content = self.id_en_tokenizer.batch_decode(translation, skip_special_tokens=True)[0]
|
| 66 |
|
| 67 |
-
return
|
| 68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
def interface(self, question):
|
| 70 |
language = self.detect_language(question)
|
| 71 |
|
|
@@ -82,9 +76,4 @@ class Sejarah:
|
|
| 82 |
answer = self.language_converter(result['answers'][0].answer, language, "answer")
|
| 83 |
context = self.language_converter(result['answers'][0].context, language, "answer")
|
| 84 |
|
| 85 |
-
return answer, context
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
def detect_language(self, content):
|
| 89 |
-
lang = langid.classify(content)
|
| 90 |
-
return lang[0]
|
|
|
|
| 4 |
from haystack.nodes import TextConverter, PreProcessor, BM25Retriever, FARMReader
|
| 5 |
from haystack.document_stores import InMemoryDocumentStore
|
| 6 |
from haystack.utils import print_answers
|
| 7 |
+
from deep_translator import GoogleTranslator
|
| 8 |
|
| 9 |
class Sejarah:
|
| 10 |
def __init__(self):
|
|
|
|
| 40 |
self.querying_pipeline.add_node(component=retriever, name="Retriever", inputs=["Query"])
|
| 41 |
self.querying_pipeline.add_node(component=reader, name="Reader", inputs=["Retriever"])
|
| 42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
def language_converter(self, content, lang, method):
|
| 45 |
|
|
|
|
|
|
|
| 46 |
if lang == "en":
|
| 47 |
if method == "question":
|
| 48 |
+
new_content = GoogleTranslator(source='en', target='ms').translate(content)
|
|
|
|
|
|
|
| 49 |
|
| 50 |
+
if "when" in content:
|
| 51 |
+
new_content = new_content.replace("apabila","bila")
|
| 52 |
else:
|
| 53 |
+
new_content = GoogleTranslator(source='ms', target='en').translate(content)
|
|
|
|
|
|
|
| 54 |
|
| 55 |
+
return new_content
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def detect_language(self, content):
|
| 59 |
+
lang = langid.classify(content)
|
| 60 |
+
return lang[0]
|
| 61 |
+
|
| 62 |
+
|
| 63 |
def interface(self, question):
|
| 64 |
language = self.detect_language(question)
|
| 65 |
|
|
|
|
| 76 |
answer = self.language_converter(result['answers'][0].answer, language, "answer")
|
| 77 |
context = self.language_converter(result['answers'][0].context, language, "answer")
|
| 78 |
|
| 79 |
+
return answer, context
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|