import nltk from nltk.corpus import stopwords from nltk.tokenize import word_tokenize from nltk.stem import WordNetLemmatizer from string import punctuation import streamlit as st import pickle nltk.download('punkt') nltk.download('stopwords') nltk.download('wordnet') model = pickle.load(open("model.pkl", "rb")) vectorizer = pickle.load(open("vectorizer.pkl", "rb")) st.write("# Language Detection System") inputt = st.text_area("Enter text here") def preprocess_text(text): punc = list(punctuation) stop = stopwords.words('english') bad_tokens = punc + stop lemma = WordNetLemmatizer() tokens = word_tokenize(text) word_tokens = [t for t in tokens if t.isalpha()] clean_tokens = [lemma.lemmatize(t.lower()) for t in word_tokens if t not in bad_tokens] return ' '.join(t for t in clean_tokens) if st.button("Detect Language"): processed_text = preprocess_text(inputt) vectorized = vectorizer.transform([processed_text]).toarray() prediction = model.predict(vectorized)[0] if prediction == 1: st.header("English") if prediction == 2: st.header("Malayalam") if prediction == 3: st.header("Hindi") if prediction == 4: st.header("Tamil") if prediction == 5: st.header("Portuguese") if prediction == 6: st.header("French") if prediction == 7: st.header("Dutch") if prediction == 8: st.header("Spanish") if prediction == 9: st.header("Greek") if prediction == 10: st.header("Russian") if prediction == 11: st.header("Danish") if prediction == 12: st.header("Italian") if prediction == 13: st.header("Turkish") if prediction == 14: st.header("Swedish") if prediction == 15: st.header("Arabic") if prediction == 16: st.header("German") if prediction == 17: st.header("Kannada")