Spaces:
Runtime error
Runtime error
Tarandeep Singh commited on
Commit ·
16da076
1
Parent(s): 2b8afcb
first commit
Browse files- .gitignore +1 -0
- Dockerfile +14 -0
- README copy.md +7 -0
- app.py +35 -0
- modules/ngram_models_utils.py +121 -0
- requirements.txt +3 -0
- static/file_text.pkl +3 -0
- static/style.css +17 -0
- templates/.DS_Store +0 -0
- templates/bgimage.jpg +0 -0
- templates/index.html +94 -0
- templates/index_old.html +20 -0
- templates/result.html +10 -0
.gitignore
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
__pycache__/*
|
Dockerfile
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
|
| 2 |
+
# you will also find guides on how best to write your Dockerfile
|
| 3 |
+
|
| 4 |
+
FROM python:3.9
|
| 5 |
+
|
| 6 |
+
WORKDIR /code
|
| 7 |
+
|
| 8 |
+
COPY ./requirements.txt /code/requirements.txt
|
| 9 |
+
|
| 10 |
+
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
| 11 |
+
|
| 12 |
+
COPY . .
|
| 13 |
+
|
| 14 |
+
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
|
README copy.md
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
n_gram_app helps predict sequence from William Blake's Poems, the data for which was obtained from gutenberg's
|
| 2 |
+
blake-poems.txt
|
| 3 |
+
|
| 4 |
+
Steps to the UI:
|
| 5 |
+
1. Run n_gram_app.py
|
| 6 |
+
2. Open web browser on http://127.0.0.1:5000/
|
| 7 |
+
3. Enter inputs
|
app.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from flask import Flask, render_template, request,jsonify
|
| 2 |
+
from modules.ngram_models_utils import generate_sentence, get_probability, preprocess_new, probability_helper, predict,create_ngrams
|
| 3 |
+
import pickle
|
| 4 |
+
from collections import Counter
|
| 5 |
+
|
| 6 |
+
app = Flask(__name__)
|
| 7 |
+
# Load the data from the pickle file
|
| 8 |
+
|
| 9 |
+
with open('static/file_text.pkl', 'rb') as pickle_file:
|
| 10 |
+
data = pickle.load(pickle_file)
|
| 11 |
+
|
| 12 |
+
blake = data['blake-poems.txt']
|
| 13 |
+
|
| 14 |
+
@app.route('/')
|
| 15 |
+
def index():
|
| 16 |
+
return render_template('index.html')
|
| 17 |
+
|
| 18 |
+
@app.route('/generate_sequence', methods=['POST'])
|
| 19 |
+
|
| 20 |
+
def generate_sequence():
|
| 21 |
+
# Replace 'input1', 'input2', etc. with your actual input field names
|
| 22 |
+
initial_sequence = request.form['initial_sequence']
|
| 23 |
+
n_grams = request.form['n_grams']
|
| 24 |
+
sentence_length = request.form['sentence_length']
|
| 25 |
+
n_grams, sentence_length = int(n_grams), int(sentence_length)
|
| 26 |
+
probs_blake = get_probability(blake,n_grams ,type = "smooth")
|
| 27 |
+
|
| 28 |
+
# Here, call your function with the inputs
|
| 29 |
+
output_sequence = generate_sentence(probs_blake,initial_sequence, n_grams, sentence_length)
|
| 30 |
+
|
| 31 |
+
#return render_template('result.html', sequence=output_sequence)
|
| 32 |
+
return jsonify({'sequence': output_sequence})
|
| 33 |
+
|
| 34 |
+
if __name__ == '__main__':
|
| 35 |
+
app.run(debug=True)
|
modules/ngram_models_utils.py
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import numpy as np
|
| 3 |
+
import nltk, string
|
| 4 |
+
from nltk.corpus import gutenberg
|
| 5 |
+
from collections import Counter
|
| 6 |
+
from nltk.tokenize import word_tokenize
|
| 7 |
+
import random, string
|
| 8 |
+
|
| 9 |
+
def preprocess_new(text):
|
| 10 |
+
text = ' '.join(gutenberg.raw('blake-poems.txt').split())
|
| 11 |
+
# Remove punctuation except for commas
|
| 12 |
+
punctuation_to_remove = string.punctuation.replace(',', '') # Keep commas
|
| 13 |
+
translator = str.maketrans('', '', punctuation_to_remove)
|
| 14 |
+
text = text.translate(translator)
|
| 15 |
+
#text = text.translate(str.maketrans('', '', string.punctuation))
|
| 16 |
+
|
| 17 |
+
# Tokenize and lower case
|
| 18 |
+
tokens = word_tokenize(text)
|
| 19 |
+
tokens = [word.lower() for word in tokens]
|
| 20 |
+
return tokens
|
| 21 |
+
|
| 22 |
+
def create_ngrams(tokens, n):
|
| 23 |
+
n_gram_tokens = []
|
| 24 |
+
for i in range(len(tokens)-n):
|
| 25 |
+
n_gram_tokens.append(tuple(tokens[i:i+n]))
|
| 26 |
+
return n_gram_tokens
|
| 27 |
+
|
| 28 |
+
def probability_helper(sample,n):
|
| 29 |
+
"""
|
| 30 |
+
sample: text sample
|
| 31 |
+
n: n-gram size
|
| 32 |
+
return: dataframe with probability
|
| 33 |
+
"""
|
| 34 |
+
#get ngrams
|
| 35 |
+
ngrams_sample = create_ngrams(sample,n)
|
| 36 |
+
|
| 37 |
+
#get frequency
|
| 38 |
+
ngram_frequency = Counter([tuple(ngram) for ngram in ngrams_sample])
|
| 39 |
+
|
| 40 |
+
#ger probability
|
| 41 |
+
df = pd.DataFrame.from_dict(ngram_frequency, orient='index').reset_index()
|
| 42 |
+
df.columns = ['sequence', 'count']
|
| 43 |
+
|
| 44 |
+
#convert first column into 2 columns where first column has n-1 words, the second column has nth word
|
| 45 |
+
df['nth_word'] = df['sequence'].apply(lambda x: x[-1])
|
| 46 |
+
|
| 47 |
+
def get_sequence(tuple):
|
| 48 |
+
x = ''
|
| 49 |
+
for i in range(len(tuple)-1):
|
| 50 |
+
x+=(tuple[i])
|
| 51 |
+
x+=','
|
| 52 |
+
x = x[:-1]
|
| 53 |
+
x = x.replace(","," ")
|
| 54 |
+
return x
|
| 55 |
+
|
| 56 |
+
df['sequence'] = df['sequence'].apply(lambda x: get_sequence(x))
|
| 57 |
+
|
| 58 |
+
#get ids for sequences and predictions
|
| 59 |
+
df_sorted = df.sort_values(by='sequence')
|
| 60 |
+
df_sorted['sequence_id'] = range(1, len(df_sorted) + 1)
|
| 61 |
+
df_new = df_sorted
|
| 62 |
+
df_sorted = df_new.sort_values(by='nth_word')
|
| 63 |
+
df_sorted['prediction_id'] = range(1, len(df_sorted) + 1)
|
| 64 |
+
|
| 65 |
+
return df, df_sorted
|
| 66 |
+
|
| 67 |
+
def get_probability(sample,n,type = None):
|
| 68 |
+
if type==None:
|
| 69 |
+
|
| 70 |
+
df, df_sorted = probability_helper(sample,n)
|
| 71 |
+
totals = df.groupby('sequence')['count'].sum().reset_index().rename(columns={'count':'total'})
|
| 72 |
+
df_sorted = df_sorted.merge(totals, how = 'left', on = 'sequence')
|
| 73 |
+
df_sorted['probability'] = df_sorted['count']/df_sorted['total']
|
| 74 |
+
elif type =="smooth":
|
| 75 |
+
df, df_sorted = probability_helper(sample,n)
|
| 76 |
+
v = df_sorted['prediction_id'].max()
|
| 77 |
+
|
| 78 |
+
totals = df.groupby('sequence')['count'].sum().reset_index().rename(columns={'count':'total'})
|
| 79 |
+
df_sorted = df_sorted.merge(totals, how = 'left', on = 'sequence')
|
| 80 |
+
df_sorted['probability'] = (df_sorted['count']+1)/(df_sorted['total'] + v)
|
| 81 |
+
|
| 82 |
+
return df_sorted
|
| 83 |
+
|
| 84 |
+
def predict(data, sequence):
|
| 85 |
+
"""this function generates predictions based on probabilities seen in the dataset"""
|
| 86 |
+
try:
|
| 87 |
+
subset = data[data['sequence']==sequence.strip()]
|
| 88 |
+
result = subset.iloc[subset['probability'].argmax()]['nth_word'] #return the word with max probability
|
| 89 |
+
#print("sequence detected")
|
| 90 |
+
return result
|
| 91 |
+
except:
|
| 92 |
+
result = random.choice(data['nth_word'].unique())
|
| 93 |
+
#print("sequence not detected")
|
| 94 |
+
return result
|
| 95 |
+
|
| 96 |
+
def generate_sentence(data, sequence, n,len ):
|
| 97 |
+
"""
|
| 98 |
+
data: result of get_probability()
|
| 99 |
+
sequence: should be n-1 words together
|
| 100 |
+
len: number of predictions to be made
|
| 101 |
+
"""
|
| 102 |
+
sentence = sequence
|
| 103 |
+
sentence = sentence.strip()
|
| 104 |
+
for i in range(len):
|
| 105 |
+
n_minus_1_sequence = ' '.join(sentence.split(" ")[-n+1:])
|
| 106 |
+
#print(f'sequence number {i+1}: {n_minus_1_sequence}')
|
| 107 |
+
next_word = predict(data, n_minus_1_sequence)
|
| 108 |
+
if next_word!=',':
|
| 109 |
+
sentence = sentence + ' ' + next_word
|
| 110 |
+
else:
|
| 111 |
+
sentence+=next_word
|
| 112 |
+
return sentence
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
'''
|
| 116 |
+
files = gutenberg.fileids()
|
| 117 |
+
text = [gutenberg.raw(fileid) for fileid in gutenberg.fileids()]
|
| 118 |
+
file_text = dict(zip(files, text))
|
| 119 |
+
|
| 120 |
+
for key, value in file_text.items():
|
| 121 |
+
file_text[key] = preprocess_new(value)'''
|
requirements.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Flask==2.2.5
|
| 2 |
+
nltk==3.7
|
| 3 |
+
pandas==1.2.4
|
static/file_text.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0dc6993bcb5b17cd78fce735fcdef160ea75ab403d0fef8abfc6e668f6aa22bd
|
| 3 |
+
size 1712635
|
static/style.css
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
body {
|
| 2 |
+
font-family: Arial, sans-serif;
|
| 3 |
+
margin: 0;
|
| 4 |
+
padding: 0;
|
| 5 |
+
background-color: #f0f0f0;
|
| 6 |
+
color: #333;
|
| 7 |
+
}
|
| 8 |
+
|
| 9 |
+
h1 {
|
| 10 |
+
color: #007bff;
|
| 11 |
+
text-align: center;
|
| 12 |
+
}
|
| 13 |
+
|
| 14 |
+
h2 {
|
| 15 |
+
color: #6c757d;
|
| 16 |
+
text-align: center;
|
| 17 |
+
}
|
templates/.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
templates/bgimage.jpg
ADDED
|
templates/index.html
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<title>Sequence Generator</title>
|
| 6 |
+
<script src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
|
| 7 |
+
<style>
|
| 8 |
+
body {
|
| 9 |
+
font-family: 'Times New Roman', serif;
|
| 10 |
+
background-image: url('bgimg.jpg');
|
| 11 |
+
background-size: cover;
|
| 12 |
+
background-attachment: fixed;
|
| 13 |
+
margin: 0;
|
| 14 |
+
padding: 0;
|
| 15 |
+
display: flex;
|
| 16 |
+
justify-content: center;
|
| 17 |
+
align-items: center;
|
| 18 |
+
height: 100vh;
|
| 19 |
+
flex-direction: column;
|
| 20 |
+
color: #3e3e3e;
|
| 21 |
+
}
|
| 22 |
+
h1, h2 {
|
| 23 |
+
text-align: center;
|
| 24 |
+
color: #614532; /* Dark brown color for a rustic look */
|
| 25 |
+
}
|
| 26 |
+
form {
|
| 27 |
+
background-color: rgba(255, 255, 255, 0.8); /* Slightly transparent white */
|
| 28 |
+
padding: 20px;
|
| 29 |
+
border-radius: 8px;
|
| 30 |
+
margin-bottom: 20px;
|
| 31 |
+
}
|
| 32 |
+
input, button {
|
| 33 |
+
display: block;
|
| 34 |
+
width: 100%;
|
| 35 |
+
padding: 10px;
|
| 36 |
+
margin-top: 10px;
|
| 37 |
+
border-radius: 5px;
|
| 38 |
+
border: 1px solid #ddd;
|
| 39 |
+
background: rgba(255, 255, 255, 0.5);
|
| 40 |
+
}
|
| 41 |
+
button {
|
| 42 |
+
background-color: #8a5a44; /* Earthy tone */
|
| 43 |
+
color: white;
|
| 44 |
+
border: none;
|
| 45 |
+
cursor: pointer;
|
| 46 |
+
}
|
| 47 |
+
button:hover {
|
| 48 |
+
background-color: #7d4e3b;
|
| 49 |
+
}
|
| 50 |
+
#result {
|
| 51 |
+
display: none;
|
| 52 |
+
transition: opacity 1s ease-in-out;
|
| 53 |
+
background-color: rgba(255, 255, 255, 0.8); /* Slightly transparent white */
|
| 54 |
+
padding: 20px;
|
| 55 |
+
border-radius: 8px;
|
| 56 |
+
}
|
| 57 |
+
#result.show {
|
| 58 |
+
display: block;
|
| 59 |
+
opacity: 1;
|
| 60 |
+
}
|
| 61 |
+
</style>
|
| 62 |
+
</head>
|
| 63 |
+
<body>
|
| 64 |
+
<h1>Generate a William Blake-like Poem</h1>
|
| 65 |
+
<h2>Rediscover the Beauty of Romanticism Poetry</h2>
|
| 66 |
+
<form id="sequenceForm">
|
| 67 |
+
<input type="text" name="initial_sequence" placeholder="few words">
|
| 68 |
+
<input type="number" name="n_grams" placeholder="n-grams">
|
| 69 |
+
<input type="number" name="sentence_length" placeholder="Poem Length (in words)">
|
| 70 |
+
<button type="submit">Generate Poem</button>
|
| 71 |
+
</form>
|
| 72 |
+
<div id="result">
|
| 73 |
+
<h2>Your Generated Poem:</h2>
|
| 74 |
+
<p id="sequenceOutput"></p>
|
| 75 |
+
</div>
|
| 76 |
+
|
| 77 |
+
<script>
|
| 78 |
+
$(document).ready(function() {
|
| 79 |
+
$('#sequenceForm').on('submit', function(e) {
|
| 80 |
+
e.preventDefault(); // Prevent the default form submission
|
| 81 |
+
$.ajax({
|
| 82 |
+
url: '/generate_sequence', // Your Flask endpoint
|
| 83 |
+
type: 'POST',
|
| 84 |
+
data: $(this).serialize(),
|
| 85 |
+
success: function(response) {
|
| 86 |
+
$('#sequenceOutput').text(response.sequence); // Assuming 'response.sequence' is the output
|
| 87 |
+
$('#result').addClass('show');
|
| 88 |
+
}
|
| 89 |
+
});
|
| 90 |
+
});
|
| 91 |
+
});
|
| 92 |
+
</script>
|
| 93 |
+
</body>
|
| 94 |
+
</html>
|
templates/index_old.html
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html>
|
| 3 |
+
<head>
|
| 4 |
+
<title>William Blake Poem Generator</title>
|
| 5 |
+
<!--<link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='style.css') }}">-->
|
| 6 |
+
</head>
|
| 7 |
+
<body>
|
| 8 |
+
<h1>Using n-grams to generate a william blake-like poem</h1>
|
| 9 |
+
<h2>A Simple Yet Effective Demonstration</h2>
|
| 10 |
+
</body>
|
| 11 |
+
<body>
|
| 12 |
+
<form action="/generate" method="post">
|
| 13 |
+
<input type="text" name="initial_sequence" placeholder="initial_sequence">
|
| 14 |
+
<input type="number" name="n_grams" placeholder="n-grams">
|
| 15 |
+
<input type="number" name="sentence_length" placeholder="Poem Length (in words)">
|
| 16 |
+
<!-- Add more input fields as necessary -->
|
| 17 |
+
<button type="submit">Generate Sentence</button>
|
| 18 |
+
</form>
|
| 19 |
+
</body>
|
| 20 |
+
</html>
|
templates/result.html
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html>
|
| 3 |
+
<head>
|
| 4 |
+
<title>Sequence Result</title>
|
| 5 |
+
</head>
|
| 6 |
+
<body>
|
| 7 |
+
<p>Generated Sequence: {{ sequence }}</p>
|
| 8 |
+
<a href="/">Try Again</a>
|
| 9 |
+
</body>
|
| 10 |
+
</html>
|