Spaces:
Sleeping
Sleeping
| import nltk | |
| from nltk.corpus import stopwords | |
| from nltk.tokenize import word_tokenize | |
| from nltk.stem import WordNetLemmatizer | |
| from string import punctuation | |
| import streamlit as st | |
| import pickle | |
| nltk.download('punkt') | |
| nltk.download('stopwords') | |
| nltk.download('wordnet') | |
| model = pickle.load(open("model.pkl", "rb")) | |
| vectorizer = pickle.load(open("vectorizer.pkl", "rb")) | |
| st.write("# Language Detection System") | |
| inputt = st.text_area("Enter text here") | |
| def preprocess_text(text): | |
| punc = list(punctuation) | |
| stop = stopwords.words('english') | |
| bad_tokens = punc + stop | |
| lemma = WordNetLemmatizer() | |
| tokens = word_tokenize(text) | |
| word_tokens = [t for t in tokens if t.isalpha()] | |
| clean_tokens = [lemma.lemmatize(t.lower()) for t in word_tokens if t not in bad_tokens] | |
| return ' '.join(t for t in clean_tokens) | |
| if st.button("Detect Language"): | |
| processed_text = preprocess_text(inputt) | |
| vectorized = vectorizer.transform([processed_text]).toarray() | |
| prediction = model.predict(vectorized)[0] | |
| if prediction == 1: | |
| st.header("English") | |
| if prediction == 2: | |
| st.header("Malayalam") | |
| if prediction == 3: | |
| st.header("Hindi") | |
| if prediction == 4: | |
| st.header("Tamil") | |
| if prediction == 5: | |
| st.header("Portuguese") | |
| if prediction == 6: | |
| st.header("French") | |
| if prediction == 7: | |
| st.header("Dutch") | |
| if prediction == 8: | |
| st.header("Spanish") | |
| if prediction == 9: | |
| st.header("Greek") | |
| if prediction == 10: | |
| st.header("Russian") | |
| if prediction == 11: | |
| st.header("Danish") | |
| if prediction == 12: | |
| st.header("Italian") | |
| if prediction == 13: | |
| st.header("Turkish") | |
| if prediction == 14: | |
| st.header("Swedish") | |
| if prediction == 15: | |
| st.header("Arabic") | |
| if prediction == 16: | |
| st.header("German") | |
| if prediction == 17: | |
| st.header("Kannada") |