File size: 9,478 Bytes
9377e86
 
 
 
 
 
 
 
dba2bfc
9377e86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9c93884
9377e86
2a296fb
9377e86
 
 
 
 
 
 
 
2a296fb
9377e86
 
 
 
 
9c93884
9377e86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2a296fb
9377e86
 
 
 
 
9c93884
9377e86
 
 
 
 
 
 
 
 
 
 
 
 
 
2a296fb
9377e86
 
6a74d4d
 
 
 
 
9377e86
 
 
 
 
 
 
 
2a296fb
9377e86
 
 
6a74d4d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9377e86
 
 
6a74d4d
 
 
 
 
 
 
 
 
9c93884
6a74d4d
 
 
 
 
 
 
 
 
9377e86
 
 
6a74d4d
 
 
 
 
 
 
 
 
9c93884
6a74d4d
 
 
 
 
 
 
 
 
9377e86
 
2a296fb
2fb2ae4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
import gradio as gr
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
import html as html_lib

# Initialize sentiment analysis pipeline
classifier = pipeline("sentiment-analysis")

# Set model name for summarization
sum_model_name = "facebook/bart-large-cnn"

# Initialize tokenizer for summarization model
sum_tokenizer = AutoTokenizer.from_pretrained(sum_model_name)

# Initialize summarization model
sum_model = AutoModelForSeq2SeqLM.from_pretrained(sum_model_name)

# Set model name for translation
trans_model_name = "Helsinki-NLP/opus-mt-en-uk"

# Initialize tokenizer for translation model
trans_tokenizer = AutoTokenizer.from_pretrained(trans_model_name)

# Initialize translation model
trans_model = AutoModelForSeq2SeqLM.from_pretrained(trans_model_name)

def analyze_sentiment(text):
    # Check if text is empty or contains only spaces
    if not text or not text.strip():
        return {"Error: Please enter the text for analysis": 0.0}
    
    # Escape HTML tags to protect against injections
    safe_text = html_lib.escape(text)
    
    try:
        # Get sentiment analysis result
        result = classifier(safe_text)[0]
        # Return label and score as a dictionary
        return {result["label"]: result["score"]}
    except Exception as e:
        # Return processing error inside the result dictionary
        return {f"Processing Error: {str(e)[:100]}": 0.0}

def summarize_text(text):
    # Check if text is empty or contains only spaces
    if not text or not text.strip():
        return "Error: Please enter the text to summarize"
    
    # Escape HTML tags
    safe_text = html_lib.escape(text)
    words = safe_text.split()
    warning = ""
    
    # Truncate text to 500 words to avoid memory limits and warn user
    if len(words) > 500:
        safe_text = " ".join(words[:500])
        warning = "⚠️ Warning: The text was too long and has been truncated to the first 500 words\n\n"
        
    try:
        # Tokenize the input text
        inputs = sum_tokenizer(safe_text, return_tensors="pt", max_length=1024, truncation=True)
        
        # Generate summary tokens
        summary_ids = sum_model.generate(
            inputs["input_ids"],
            max_length=60,
            min_length=20,
            num_beams=4,
            early_stopping=True
        )
        
        # Decode summary tokens back to text
        summary = sum_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
        return warning + summary
    except Exception as e:
        # Return processing error as string
        return f"Processing Error: {str(e)[:100]}"

def translate_en_to_uk(text):
    # Check if text is empty or contains only spaces
    if not text or not text.strip():
        return "Error: Please enter the text to translate"
    
    # Escape HTML tags
    safe_text = html_lib.escape(text)
    
    try:
        # Tokenize the input text for translation
        inputs = trans_tokenizer(safe_text, return_tensors="pt", max_length=512, truncation=True)
        
        # Generate translated tokens
        translated_ids = trans_model.generate(**inputs)
        
        # Decode translated tokens back to text
        return trans_tokenizer.decode(translated_ids[0], skip_special_tokens=True)
    except Exception as e:
        # Return processing error as string
        return f"Processing Error: {str(e)[:100]}"

# Define long text samples for the summarization tab
ai_sample = "Artificial intelligence (AI) has become a transformative force in modern society, influencing everything from healthcare to transportation. In the medical field, AI algorithms can analyze complex data to assist doctors in diagnosing diseases more accurately and quickly. For instance, machine learning models are trained on thousands of medical images to detect early signs of conditions like cancer. Meanwhile, in transportation, autonomous vehicles use AI to navigate roads, avoid obstacles, and reduce the likelihood of accidents caused by human error."
space_sample = "The James Webb Space Telescope (JWST) is the largest and most powerful space science telescope ever built. Launched on December 25, 2021, it represents a major leap forward in our quest to understand the universe. Unlike its predecessor, the Hubble Space Telescope, which primarily observes visible light, the JWST is designed to detect infrared radiation. This capability allows it to peer through dense clouds of cosmic dust and observe the very first galaxies that formed after the Big Bang."
climate_sample = "Climate change is a long-term shift in global or regional climate patterns. Often climate change refers specifically to the rise in global temperatures from the mid-20th century to present. It is primarily caused by human activities, especially the burning of fossil fuels, which increases levels of heat-trapping greenhouse gases in Earth's atmosphere. The impacts are already being felt globally, including more frequent and severe weather events, rising sea levels, and shifts in wildlife populations."

# Create Gradio blocks interface with a soft theme
with gr.Blocks(title="NLP Toolkit", theme=gr.themes.Soft()) as demo:
    # Add main header
    gr.Markdown('# πŸ› οΈ NLP Toolkit: Text Analysis Tools')
    
    # Add application description
    gr.Markdown('A comprehensive AI-powered tool that allows you to analyze text sentiment, generate short summaries for long articles, and translate text from English to Ukrainian.')
    
    # Create main tabs container
    with gr.Tabs():
        # Create Sentiment Analysis tab
        with gr.Tab("Sentiment Analysis"):
            with gr.Column():
                input_text = gr.Textbox(label="Input Text", lines=3, placeholder="Type something in English... (e.g., I absolutely love this product!)")
                output_label = gr.Label(label="Analysis Result")
                btn_sentiment = gr.Button("Analyze Sentiment", variant='primary')
                
                # Link button click to sentiment analysis function
                btn_sentiment.click(fn=analyze_sentiment, inputs=input_text, outputs=output_label)
                
                # Configure native examples for short texts
                gr.Examples(
                    examples=[
                        ["I absolutely love this new update! It's fantastic."], 
                        ["I'm very disappointed with the service, it was terrible."],
                        ["The weather today is completely ordinary."]
                    ],
                    inputs=input_text,
                    cache_examples=False
                )
                
        # Create Summarization tab
        with gr.Tab("Summarization"):
            with gr.Column():
                input_long = gr.Textbox(label="Long Text (EN)", lines=6, placeholder="Paste a long English article to summarize here...")
                output_summary = gr.Textbox(label="Short Summary", lines=3)
                btn_summarize = gr.Button("Generate Summary", variant="primary")
                
                # Link button click to summarization function
                btn_summarize.click(fn=summarize_text, inputs=input_long, outputs=output_summary)
                
                # Add examples section header
                gr.Markdown("### πŸ“‹ Click an example button below to load the text:")
                with gr.Row():
                    sum_ex1 = gr.Button("πŸ’‘ AI Force")
                    sum_ex2 = gr.Button("πŸš€ Space Telescope")
                    sum_ex3 = gr.Button("🌍 Climate Change")
                
                # Link custom buttons to load the long texts into the input field
                sum_ex1.click(fn=lambda: ai_sample, outputs=input_long)
                sum_ex2.click(fn=lambda: space_sample, outputs=input_long)
                sum_ex3.click(fn=lambda: climate_sample, outputs=input_long)

        # Create Translation tab
        with gr.Tab("Translation (EN -> UK)"):
            with gr.Column():
                input_trans = gr.Textbox(label="English Text", lines=4, placeholder="Enter text in English to translate into Ukrainian...")
                output_trans = gr.Textbox(label="Ukrainian Translation", lines=4)
                btn_translate = gr.Button("Translate to Ukrainian", variant="primary")
                
                # Link button click to translation function
                btn_translate.click(fn=translate_en_to_uk, inputs=input_trans, outputs=output_trans)
                
                # Add examples section header
                gr.Markdown("### πŸ“‹ Click an example button below to load the text:")
                with gr.Row():
                    trans_ex1 = gr.Button("πŸ€– AI Future")
                    trans_ex2 = gr.Button("β˜• Coffee Order")
                    trans_ex3 = gr.Button("πŸ“Š Testing Phase")
                
                # Link custom buttons to load sentences into the correct translation field
                trans_ex1.click(fn=lambda: "Artificial intelligence is rapidly changing the world.", outputs=input_trans)
                trans_ex2.click(fn=lambda: "I would like to order a cup of coffee, please.", outputs=input_trans)
                trans_ex3.click(fn=lambda: "The results of the recent testing phase exceeded all our initial expectations.", outputs=input_trans)

if __name__ == "__main__":
    # Launch application with active debug mode
    demo.launch(debug=True)