Spaces:

HYCCC
/

sentiment_analysis

Runtime error

cccc commited on Sep 14, 2023

Commit

e254324

1 Parent(s): 9b08af7

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -20,7 +20,7 @@ def readLMwords():
 def sentiment_analysis(sentence, model_name):
     model_name = "CCCC/"+model_name
-    sentences = sentence.strip().split('\n')
     template = '{"placeholder":"text_a"} Shares are {"mask"}.'
     classes = ['positive', 'neutral', 'negative']
     positive,negative,neutral = readLMwords()
@@ -36,17 +36,19 @@ def sentiment_analysis(sentence, model_name):
         "CCCC/RoBERTa_English_FinancialNews_tuned":"roberta",
     }
-    if 'Chinese' in modelname:
         tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-zh-en")
         model = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-zh-en")
         translated_tokens = model.generate(
-             **tokenizer(sentences, return_tensors="pt", padding=True)
         )
-        sentences = []
         for t in translated_tokens:
-            sentences.append(tokenizer.decode(t, skip_special_tokens=True))
     testdata = []
     for i,sentence in enumerate(sentences):
@@ -81,7 +83,7 @@ def sentiment_analysis(sentence, model_name):
     for step, inputs in enumerate(test_dataloader):
         logits = prompt_model(inputs)
         result.extend(torch.argmax(logits, dim=-1))
-    output = '\n'.join([classes[i] for i in result])
     return str(output)

 def sentiment_analysis(sentence, model_name):
     model_name = "CCCC/"+model_name
+    raw_sentences = sentence.strip().split('\n')
     template = '{"placeholder":"text_a"} Shares are {"mask"}.'
     classes = ['positive', 'neutral', 'negative']
     positive,negative,neutral = readLMwords()
         "CCCC/RoBERTa_English_FinancialNews_tuned":"roberta",
     }
+    if 'Chinese' in model_name:
         tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-zh-en")
         model = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-zh-en")
         translated_tokens = model.generate(
+             **tokenizer(raw_sentences, return_tensors="pt", padding=True)
         )
+        sentences_translated = []
         for t in translated_tokens:
+            sentences_translated.append(tokenizer.decode(t, skip_special_tokens=True))
+        sentences = sentences_translated
+    else:
+        sentences = raw_sentences
     testdata = []
     for i,sentence in enumerate(sentences):
     for step, inputs in enumerate(test_dataloader):
         logits = prompt_model(inputs)
         result.extend(torch.argmax(logits, dim=-1))
+    output = '\n'.join([f"{classes[res]}, {raw_sentences[i]}" for i,res in enumerate(result)])
     return str(output)