Spaces:

Ginidu2003
/

News_Classifier

Sleeping

App Files Files Community

Ginidu2003 commited on Apr 4

Commit

620df90

verified ·

1 Parent(s): 74452f9

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -13

app.py CHANGED Viewed

@@ -16,7 +16,7 @@ import io
 nltk.download('wordnet', quiet=True)
 nltk.download('punkt', quiet=True)
 nltk.download('punkt_tab', quiet=True)
 lemmatizer = WordNetLemmatizer()
@@ -80,13 +80,9 @@ from transformers import AutoTokenizer, AutoModelForQuestionAnswering
 qa_tokenizer = AutoTokenizer.from_pretrained("deepset/roberta-base-squad2")
 qa_model = AutoModelForQuestionAnswering.from_pretrained("deepset/roberta-base-squad2")
-from wordcloud import WordCloud
-import matplotlib.pyplot as plt
-import io
 def answer_question(news_content, question):
     if not news_content.strip() or not question.strip():
-        return "Please enter both news content and a question."
     try:
         inputs = qa_tokenizer(question, news_content, return_tensors="pt", truncation=True, max_length=512)
@@ -97,21 +93,18 @@ def answer_question(news_content, question):
         start_idx = torch.argmax(outputs.start_logits)
         end_idx = torch.argmax(outputs.end_logits) + 1
-        # Clean answer - remove question repetition and special tokens
         answer = qa_tokenizer.decode(inputs.input_ids[0][start_idx:end_idx],
                                     skip_special_tokens=True,
                                     clean_up_tokenization_spaces=True)
         confidence = torch.max(torch.softmax(outputs.start_logits, dim=1)).item()
         # ====== Useful Statistics ======
         words = nltk.word_tokenize(news_content)
         sentences = nltk.sent_tokenize(news_content)
         word_count = len(words)
         sentence_count = len(sentences)
-        reading_time = round(word_count / 200, 2)  # ~200 words/minute
         stats = f"Word count: {word_count}\nSentences: {sentence_count}\nEstimated reading time: {reading_time} min"
@@ -122,10 +115,15 @@ def answer_question(news_content, question):
         ax.axis("off")
         buf = io.BytesIO()
         plt.savefig(buf, format="png")
         buf.seek(0)
-        return f"**Answer:** {answer.strip()}\n\n**Confidence:** {confidence:.2%}\n\n{stats}", buf
     except Exception as e:
-        return f"Error: {str(e)}"
 # ====================== GRADIO INTERFACE ======================
@@ -158,13 +156,14 @@ with gr.Blocks(title="Daily Mirror News Classifier") as demo:
             news_input = gr.Textbox(lines=12, label="Paste News Content", placeholder="Paste the full news article here...")
             question_input = gr.Textbox(label="Your Question", placeholder="e.g. What is the main topic?")
             qa_btn = gr.Button("🔍 Get Answer", variant="primary")
             qa_output = gr.Textbox(label="Answer + Stats", lines=8)
             wc_output = gr.Image(label="Word Cloud")
             qa_btn.click(
                 fn=answer_question,
                 inputs=[news_input, question_input],
-                outputs=[qa_output,wc_output]
             )
     gr.Markdown("Built for Text Analytics Assignment - Section 02")

 nltk.download('wordnet', quiet=True)
 nltk.download('punkt', quiet=True)
 nltk.download('punkt_tab', quiet=True)
+nltk.download('averaged_perceptron_tagger', quiet=True)
 lemmatizer = WordNetLemmatizer()
 qa_tokenizer = AutoTokenizer.from_pretrained("deepset/roberta-base-squad2")
 qa_model = AutoModelForQuestionAnswering.from_pretrained("deepset/roberta-base-squad2")
 def answer_question(news_content, question):
     if not news_content.strip() or not question.strip():
+        return "Please enter both news content and a question.", None
     try:
         inputs = qa_tokenizer(question, news_content, return_tensors="pt", truncation=True, max_length=512)
         start_idx = torch.argmax(outputs.start_logits)
         end_idx = torch.argmax(outputs.end_logits) + 1
         answer = qa_tokenizer.decode(inputs.input_ids[0][start_idx:end_idx],
                                     skip_special_tokens=True,
                                     clean_up_tokenization_spaces=True)
         confidence = torch.max(torch.softmax(outputs.start_logits, dim=1)).item()
         # ====== Useful Statistics ======
         words = nltk.word_tokenize(news_content)
         sentences = nltk.sent_tokenize(news_content)
         word_count = len(words)
         sentence_count = len(sentences)
+        reading_time = round(word_count / 200, 2)
         stats = f"Word count: {word_count}\nSentences: {sentence_count}\nEstimated reading time: {reading_time} min"
         ax.axis("off")
         buf = io.BytesIO()
         plt.savefig(buf, format="png")
+        plt.close(fig)          # Important
         buf.seek(0)
+        full_output = f"**Answer:** {answer.strip()}\n\n**Confidence:** {confidence:.2%}\n\n{stats}"
+        return full_output, buf
     except Exception as e:
+        return f"Error: {str(e)}", None
 # ====================== GRADIO INTERFACE ======================
             news_input = gr.Textbox(lines=12, label="Paste News Content", placeholder="Paste the full news article here...")
             question_input = gr.Textbox(label="Your Question", placeholder="e.g. What is the main topic?")
             qa_btn = gr.Button("🔍 Get Answer", variant="primary")
             qa_output = gr.Textbox(label="Answer + Stats", lines=8)
             wc_output = gr.Image(label="Word Cloud")
             qa_btn.click(
                 fn=answer_question,
                 inputs=[news_input, question_input],
+                outputs=[qa_output, wc_output]
             )
     gr.Markdown("Built for Text Analytics Assignment - Section 02")