Kakaarot commited on
Commit
e622245
·
verified ·
1 Parent(s): b5ae9f1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -0
app.py CHANGED
@@ -12,9 +12,19 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
12
  # AutoTokenizer helps in Text input -> Sentences -> Words -> Even subwords like ['un', 'break', 'able'] -> Integer IDs that model expects.
13
  # And whats awesome is Tokens will be generated following the configurations and requirements of model which we will be using.
14
 
 
 
 
15
 
16
  from wordcloud import WordCloud
 
 
 
 
 
17
  import matplotlib.pyplot as plt
 
 
18
  import torch
19
 
20
  # Cache the model and tokenizer to avoid reloading on every run
 
12
  # AutoTokenizer helps in Text input -> Sentences -> Words -> Even subwords like ['un', 'break', 'able'] -> Integer IDs that model expects.
13
  # And whats awesome is Tokens will be generated following the configurations and requirements of model which we will be using.
14
 
15
+ # AutoModelForCausalLM is a powerful and convenient class serves as a high-level interface for loading pre-trained transformer models specifically designed for causal language modeling.
16
+ # The cooler part of these Auto* classes are you don't need to know exact class name of model(like GPT2LMHeadModel, CTRLLMHeadModel, ReformerLMHeadModel, etc.).
17
+ # Meaning The AutoModelForCausalLM automatically determine the correct model architecture based on the pretrained_model_name_or_path you provide. AutoModelForCausalLM infers this from the configuration files associated with the pre-trained model.
18
 
19
  from wordcloud import WordCloud
20
+ # This will help us in knowing which words have large frequency. It creates a visual representation of words used, know as Word Cloud.
21
+ # More the frequency + More the importance -> Word will appear larger in Word Cloud.
22
+ # Mostly it avoids our stop words like it, is, are etc
23
+
24
+
25
  import matplotlib.pyplot as plt
26
+ # This guy helps us to plot. So wait till you see it.
27
+
28
  import torch
29
 
30
  # Cache the model and tokenizer to avoid reloading on every run