Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| from transformers import MarianMTModel, MarianTokenizer | |
| model_name = 'Helsinki-NLP/opus-mt-en-ur' | |
| """ | |
| MODEL_NAME = os.environ.get("model_name") | |
| if not MODEL_NAME: | |
| raise ValueError("MODEL_NAME is not set. Please add it in the Hugging Face Secrets.") | |
| """ | |
| model = MarianMTModel.from_pretrained(model_name) | |
| tokenizer = MarianTokenizer.from_pretrained(model_name) | |
| # Function to translate text from English to Urdu | |
| def translate_text(text): | |
| inputs = tokenizer(text, return_tensors="pt", padding=True) | |
| translated = model.generate(**inputs) | |
| return tokenizer.decode(translated[0], skip_special_tokens=True) | |
| # Streamlit app | |
| st.title("Dataset Translator From English to Urdu For Chatbot") | |
| st.title("Upload Csv file for translation into Urdu. Remember Csv file must contain Only Question and Answer Column") | |
| # Upload CSV file | |
| uploaded_file = st.file_uploader("Upload your CSV file", type=["csv"]) | |
| if uploaded_file: | |
| # Read the file into a pandas DataFrame | |
| data = pd.read_csv(uploaded_file) | |
| # Translate questions and answers | |
| if 'Question' in data.columns and 'Answer' in data.columns: | |
| data['Question_Urdu'] = data['Question'].apply(translate_text) | |
| data['Answer_Urdu'] = data['Answer'].apply(translate_text) | |
| # Display the translated dataframe | |
| st.write(data) | |
| # Provide option to download the translated CSV | |
| translated_file = data.to_csv(index=False) | |
| st.download_button("Download Translated CSV", translated_file, "Diabetes_Translated_Urdu.csv") | |
| else: | |
| st.error("CSV file must contain 'Question' and 'Answer' columns") | |