Spaces:

Obai33
/

MSC-Russian-Text-Generation

Runtime error

App Files Files Community

Obai33 commited on May 3, 2024

Commit

b5924f6

verified ·

1 Parent(s): ab5b302

initial commit

Browse files

Files changed (1) hide show

app.py +127 -0

app.py ADDED Viewed

	@@ -0,0 +1,127 @@

+# -*- coding: utf-8 -*-
+"""Copy of russian model testing.ipynb
+Automatically generated by Colab.
+Original file is located at
+    https://colab.research.google.com/drive/1c9k49wiWEvDa1zxIw65pUAsuzMlFn-tq
+"""
+!pip install gradio
+!pip install translate
+import nltk
+from nltk.tokenize import word_tokenize
+from nltk.corpus import stopwords
+import pandas as pd
+import numpy as np
+import tensorflow as tf
+from tensorflow.keras.preprocessing.text import Tokenizer
+from tensorflow.keras.preprocessing.sequence import pad_sequences
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional
+from nltk.corpus import stopwords
+from nltk.stem import WordNetLemmatizer
+import nltk
+from nltk.translate.bleu_score import sentence_bleu
+nltk.download('stopwords')
+nltk.download('wordnet')
+nltk.download('punkt')
+url = 'https://raw.githubusercontent.com/Obai33/NLP_PoemGenerationDatasets/main/russianpoems.csv'
+text_data = pd.read_csv(url)
+# removing duplicates and missing values
+text_data.drop_duplicates(inplace = True)
+text_data.dropna(inplace = True)
+text_data
+text_data = text_data['text']
+text_data = text_data[500:700]
+# Tokenization and lowercasing
+tokenizer = Tokenizer()
+tokenizer.fit_on_texts(text_data)
+total_words = len(tokenizer.word_index) + 1
+input_sequences = []
+for line in text_data:
+	token_list = tokenizer.texts_to_sequences([line])[0]
+	for i in range(1, len(token_list)):
+		n_gram_sequence = token_list[:i+1]
+		input_sequences.append(n_gram_sequence)
+# pad sequences
+max_sequence_len = 100
+input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))
+# create predictors and label
+xs, labels = input_sequences[:,:-1],input_sequences[:,-1]
+ys = tf.keras.utils.to_categorical(labels, num_classes=total_words)
+import requests
+# URL of the model
+url = 'https://github.com/Obai33/NLP_PoemGenerationDatasets/raw/main/modelrus1.h5'
+# Local file path to save the model
+local_filename = 'modelrus1.h5'
+# Download the model file
+response = requests.get(url)
+with open(local_filename, 'wb') as f:
+    f.write(response.content)
+# Load the pre-trained model
+model = tf.keras.models.load_model(local_filename)
+# Import the necessary library for translation
+import translate
+# Function to translate text to English
+def translate_to_english(text):
+    translator = translate.Translator(from_lang="ru", to_lang="en")
+    translated_text = translator.translate(text)
+    return translated_text
+def generate_russian_text(seed_text, next_words=50):
+    generated_text = seed_text
+    for _ in range(next_words):
+      token_list = tokenizer.texts_to_sequences([generated_text])[0]
+      token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
+      predicted = np.argmax(model.predict(token_list), axis=-1)
+      output_word = ""
+      for word, index in tokenizer.word_index.items():
+          if index == predicted:
+              output_word = word
+              break
+      generated_text += " " + output_word
+      '''
+        token_list = tokenizer.encode(generated_text, add_special_tokens=False)
+        token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
+        predicted = np.argmax(model.predict(token_list), axis=-1)
+        output_word = tokenizer.decode(predicted[0])
+        generated_text += " " + output_word
+      '''
+    #reconnected_text = generated_text.replace(" ##", "")
+    t_text = translate_to_english(generated_text)
+    return generated_text, t_text
+import gradio as gr
+# Update Gradio interface to include both Arabic and English outputs
+iface = gr.Interface(
+    fn=generate_russian_text,
+    inputs="text",
+    outputs=["text", "text"],
+    title="Russian Poetry Generation",
+    description="Enter Russian text to generate a small poem.",
+    theme="compact"
+)
+# Run the interface
+iface.launch()