Spaces:

Testys
/

YorubaCNN

Sleeping

App Files Files Community

Testys commited on Sep 19, 2024

Commit

5af3f8a

verified ·

1 Parent(s): 30b1f4c

changes based on stuffs

Browse files

Files changed (1) hide show

main.py +25 -20

main.py CHANGED Viewed

@@ -7,19 +7,26 @@ import pandas as pd
 import altair as alt
 # Load the Yoruba NER model
-ner_model_name = "./my_model/pytorch_model.bin"
-model_ner = "Testys/cnn_yor_ner"
-ner_tokenizer = AutoTokenizer.from_pretrained(model_ner)
-with open("./my_model/config.json", "r") as f:
-    ner_config = json.load(f)
-ner_model = CNNForNER(
-                      pretrained_model_name=ner_config["pretrained_model_name"],
-                      num_classes=ner_config["num_classes"]
-                      )
-ner_model.load_state_dict(torch.load(ner_model_name, map_location=torch.device('cpu')))
 ner_model.eval()
 # Load the Yoruba sentiment analysis model
 sentiment_model_name = "./sent_model/sent_pytorch_model.bin"
 model_sent = "Testys/cnn_sent_yor"
@@ -39,21 +46,19 @@ sentiment_model.eval()
 def analyze_text(text):
     # Tokenize input text for NER
-    ner_inputs = ner_tokenizer(text, max_length= 514, truncation= True, padding= "max_length", return_tensors="pt")
-    input_ids = ner_inputs['input_ids']
-    # Converting token IDs back to tokens
-    tokens = [ner_tokenizer.convert_ids_to_tokens(id) for id in input_ids.squeeze().tolist()]
     # Perform Named Entity Recognition
     with torch.no_grad():
         ner_outputs = ner_model(**ner_inputs)
-    ner_predictions = torch.argmax(ner_outputs, dim=-1)[0]
     ner_labels = ner_predictions.tolist()
-    ner_labels = [ner_config["id2labels"][str(label)] for label in ner_labels]
     #matching the tokens with the labels
     ner_labels = [f"{token}: {label}" for token, label in zip(tokens, ner_labels)]

 import altair as alt
 # Load the Yoruba NER model
+# ner_model_name = "./my_model/pytorch_model.bin"
+# model_ner = "Testys/cnn_yor_ner"
+# ner_tokenizer = AutoTokenizer.from_pretrained(model_ner)
+# with open("./my_model/config.json", "r") as f:
+#     ner_config = json.load(f)
+# ner_model = CNNForNER(
+#                       pretrained_model_name=ner_config["pretrained_model_name"],
+#                       num_classes=ner_config["num_classes"]
+#                       )
+# ner_model.load_state_dict(torch.load(ner_model_name, map_location=torch.device('cpu')))
+# ner_model.eval()
+ner_model = AutoModelForTokenClassification.from_pretrained("masakhane/afroxlmr-large-ner-masakhaner-1.0_2.0")
+ner_tokenizers = AutoTokenizer.from_pretrained("masakhane/afroxlmr-large-ner-masakhaner-1.0_2.0")
+ner_config = ner_model.config
 ner_model.eval()
 # Load the Yoruba sentiment analysis model
 sentiment_model_name = "./sent_model/sent_pytorch_model.bin"
 model_sent = "Testys/cnn_sent_yor"
 def analyze_text(text):
     # Tokenize input text for NER
+    ner_inputs = ner_tokenizer(text, return_tensors="pt")
     # Perform Named Entity Recognition
+    tokens = ner_tokenizer.convert_ids_to_tokens(ner_inputs.input_ids[0])
     with torch.no_grad():
         ner_outputs = ner_model(**ner_inputs)
+    print(ner_outputs)
+    ner_predictions = torch.argmax(ner_outputs.logits, dim=-1)[0]
     ner_labels = ner_predictions.tolist()
+    print(ner_labels)
+    ner_labels = [ner_config.id2label[label] for label in ner_labels]
     #matching the tokens with the labels
     ner_labels = [f"{token}: {label}" for token, label in zip(tokens, ner_labels)]