freak360 commited on
Commit
bb48231
·
verified ·
1 Parent(s): be38579

Upload 4 files

Browse files
Files changed (4) hide show
  1. main.py +67 -0
  2. model.pkl +3 -0
  3. requirements.txt +0 -0
  4. vectorizer.pkl +3 -0
main.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import nltk
2
+ from nltk.corpus import stopwords
3
+ from nltk.tokenize import word_tokenize
4
+ from nltk.stem import WordNetLemmatizer
5
+ from string import punctuation
6
+ import streamlit as st
7
+ import pickle
8
+
9
+ nltk.download('punkt')
10
+ nltk.download('stopwords')
11
+ nltk.download('wordnet')
12
+ model = pickle.load(open("model.pkl", "rb"))
13
+ vectorizer = pickle.load(open("vectorizer.pkl", "rb"))
14
+
15
+ st.write("# Language Detection System")
16
+
17
+ inputt = st.text_area("Enter text here")
18
+
19
+ def preprocess_text(text):
20
+ punc = list(punctuation)
21
+ stop = stopwords.words('english')
22
+ bad_tokens = punc + stop
23
+ lemma = WordNetLemmatizer()
24
+ tokens = word_tokenize(text)
25
+ word_tokens = [t for t in tokens if t.isalpha()]
26
+ clean_tokens = [lemma.lemmatize(t.lower()) for t in word_tokens if t not in bad_tokens]
27
+ return ' '.join(t for t in clean_tokens)
28
+
29
+ if st.button("Detect Language"):
30
+ processed_text = preprocess_text(inputt)
31
+ vectorized = vectorizer.transform([processed_text]).toarray()
32
+ prediction = model.predict(vectorized)[0]
33
+
34
+ if prediction == 1:
35
+ st.header("English")
36
+ if prediction == 2:
37
+ st.header("Malayalam")
38
+ if prediction == 3:
39
+ st.header("Hindi")
40
+ if prediction == 4:
41
+ st.header("Tamil")
42
+ if prediction == 5:
43
+ st.header("Portuguese")
44
+ if prediction == 6:
45
+ st.header("French")
46
+ if prediction == 7:
47
+ st.header("Dutch")
48
+ if prediction == 8:
49
+ st.header("Spanish")
50
+ if prediction == 9:
51
+ st.header("Greek")
52
+ if prediction == 10:
53
+ st.header("Russian")
54
+ if prediction == 11:
55
+ st.header("Danish")
56
+ if prediction == 12:
57
+ st.header("Italian")
58
+ if prediction == 13:
59
+ st.header("Turkish")
60
+ if prediction == 14:
61
+ st.header("Swedish")
62
+ if prediction == 15:
63
+ st.header("Arabic")
64
+ if prediction == 16:
65
+ st.header("German")
66
+ if prediction == 17:
67
+ st.header("Kannada")
model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:592d90361198ed23c5f199437b72785cfe501826eafc8674459c0428d2044c5b
3
+ size 9600388
requirements.txt ADDED
Binary file (684 Bytes). View file
 
vectorizer.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41002e32081486b6248c9f897dddbf62fd104cf723f80e44de5a4b7b4d0b7163
3
+ size 544652