Spaces:

nytkng
/

poems_by_ngrams

Runtime error

App Files Files Community

Tarandeep Singh commited on Jan 27, 2024

Commit

16da076

1 Parent(s): 2b8afcb

first commit

Browse files

Files changed (13) hide show

.gitignore +1 -0
Dockerfile +14 -0
README copy.md +7 -0
app.py +35 -0
modules/ngram_models_utils.py +121 -0
requirements.txt +3 -0
static/file_text.pkl +3 -0
static/style.css +17 -0
templates/.DS_Store +0 -0
templates/bgimage.jpg +0 -0
templates/index.html +94 -0
templates/index_old.html +20 -0
templates/result.html +10 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ __pycache__/*

Dockerfile ADDED Viewed

	@@ -0,0 +1,14 @@

+# read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
+# you will also find guides on how best to write your Dockerfile
+FROM python:3.9
+WORKDIR /code
+COPY ./requirements.txt /code/requirements.txt
+RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+COPY . .
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]

README copy.md ADDED Viewed

	@@ -0,0 +1,7 @@

+n_gram_app helps predict sequence from William Blake's Poems, the data for which was obtained from gutenberg's
+blake-poems.txt
+Steps to the UI:
+1. Run n_gram_app.py
+2. Open web browser on http://127.0.0.1:5000/
+3. Enter inputs

app.py ADDED Viewed

	@@ -0,0 +1,35 @@

+from flask import Flask, render_template, request,jsonify
+from modules.ngram_models_utils import generate_sentence, get_probability, preprocess_new, probability_helper, predict,create_ngrams
+import pickle
+from collections import Counter
+app  = Flask(__name__)
+# Load the data from the pickle file
+with open('static/file_text.pkl', 'rb') as pickle_file:
+    data = pickle.load(pickle_file)
+blake = data['blake-poems.txt']
+@app.route('/')
+def index():
+    return render_template('index.html')
+@app.route('/generate_sequence', methods=['POST'])
+def generate_sequence():
+    # Replace 'input1', 'input2', etc. with your actual input field names
+    initial_sequence = request.form['initial_sequence']
+    n_grams = request.form['n_grams']
+    sentence_length = request.form['sentence_length']
+    n_grams, sentence_length = int(n_grams), int(sentence_length)
+    probs_blake  = get_probability(blake,n_grams ,type = "smooth")
+    # Here, call your function with the inputs
+    output_sequence = generate_sentence(probs_blake,initial_sequence, n_grams, sentence_length)
+    #return render_template('result.html', sequence=output_sequence)
+    return jsonify({'sequence': output_sequence})
+if __name__ == '__main__':
+    app.run(debug=True)

modules/ngram_models_utils.py ADDED Viewed

	@@ -0,0 +1,121 @@

+import pandas as pd
+import numpy as np
+import nltk, string
+from nltk.corpus import gutenberg
+from collections import Counter
+from nltk.tokenize import word_tokenize
+import random, string
+def preprocess_new(text):
+    text = ' '.join(gutenberg.raw('blake-poems.txt').split())
+    # Remove punctuation except for commas
+    punctuation_to_remove = string.punctuation.replace(',', '')  # Keep commas
+    translator = str.maketrans('', '', punctuation_to_remove)
+    text = text.translate(translator)
+    #text = text.translate(str.maketrans('', '', string.punctuation))
+    # Tokenize and lower case
+    tokens = word_tokenize(text)
+    tokens = [word.lower() for word in tokens]
+    return tokens
+def create_ngrams(tokens, n):
+    n_gram_tokens = []
+    for i in range(len(tokens)-n):
+        n_gram_tokens.append(tuple(tokens[i:i+n]))
+    return n_gram_tokens
+def probability_helper(sample,n):
+    """
+    sample: text sample
+    n: n-gram size
+    return: dataframe with probability
+    """
+    #get ngrams
+    ngrams_sample = create_ngrams(sample,n)
+    #get frequency
+    ngram_frequency = Counter([tuple(ngram) for ngram in ngrams_sample])
+    #ger probability
+    df = pd.DataFrame.from_dict(ngram_frequency, orient='index').reset_index()
+    df.columns = ['sequence',  'count']
+    #convert first column into 2 columns where first column has n-1 words, the second column has nth word
+    df['nth_word'] = df['sequence'].apply(lambda x: x[-1])
+    def get_sequence(tuple):
+        x = ''
+        for i in range(len(tuple)-1):
+            x+=(tuple[i])
+            x+=','
+        x = x[:-1]
+        x = x.replace(","," ")
+        return x
+    df['sequence'] = df['sequence'].apply(lambda x: get_sequence(x))
+    #get ids for sequences and predictions
+    df_sorted = df.sort_values(by='sequence')
+    df_sorted['sequence_id'] = range(1, len(df_sorted) + 1)
+    df_new = df_sorted
+    df_sorted = df_new.sort_values(by='nth_word')
+    df_sorted['prediction_id'] = range(1, len(df_sorted) + 1)
+    return df, df_sorted
+def get_probability(sample,n,type = None):
+    if type==None:
+        df, df_sorted = probability_helper(sample,n)
+        totals = df.groupby('sequence')['count'].sum().reset_index().rename(columns={'count':'total'})
+        df_sorted = df_sorted.merge(totals, how = 'left', on = 'sequence')
+        df_sorted['probability'] = df_sorted['count']/df_sorted['total']
+    elif type =="smooth":
+        df, df_sorted = probability_helper(sample,n)
+        v = df_sorted['prediction_id'].max()
+        totals = df.groupby('sequence')['count'].sum().reset_index().rename(columns={'count':'total'})
+        df_sorted = df_sorted.merge(totals, how = 'left', on = 'sequence')
+        df_sorted['probability'] = (df_sorted['count']+1)/(df_sorted['total'] + v)
+    return df_sorted
+def predict(data, sequence):
+    """this function generates predictions based on probabilities seen in the dataset"""
+    try:
+        subset = data[data['sequence']==sequence.strip()]
+        result = subset.iloc[subset['probability'].argmax()]['nth_word'] #return the word with max probability
+        #print("sequence detected")
+        return result
+    except:
+        result = random.choice(data['nth_word'].unique())
+        #print("sequence not detected")
+        return result
+def generate_sentence(data, sequence, n,len ):
+    """
+    data: result of get_probability()
+    sequence: should be n-1 words together
+    len: number of predictions to be made
+    """
+    sentence = sequence
+    sentence = sentence.strip()
+    for i in range(len):
+        n_minus_1_sequence = ' '.join(sentence.split(" ")[-n+1:])
+        #print(f'sequence number {i+1}: {n_minus_1_sequence}')
+        next_word = predict(data, n_minus_1_sequence)
+        if next_word!=',':
+            sentence = sentence + ' ' + next_word
+        else:
+            sentence+=next_word
+    return sentence
+'''
+files = gutenberg.fileids()
+text = [gutenberg.raw(fileid) for fileid in gutenberg.fileids()]
+file_text = dict(zip(files, text))
+for key, value in file_text.items():
+    file_text[key] = preprocess_new(value)'''

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+Flask==2.2.5
+nltk==3.7
+pandas==1.2.4

static/file_text.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0dc6993bcb5b17cd78fce735fcdef160ea75ab403d0fef8abfc6e668f6aa22bd
+size 1712635

static/style.css ADDED Viewed

	@@ -0,0 +1,17 @@

+body {
+    font-family: Arial, sans-serif;
+    margin: 0;
+    padding: 0;
+    background-color: #f0f0f0;
+    color: #333;
+}
+h1 {
+    color: #007bff;
+    text-align: center;
+}
+h2 {
+    color: #6c757d;
+    text-align: center;
+}

templates/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

templates/bgimage.jpg ADDED Viewed

templates/index.html ADDED Viewed

	@@ -0,0 +1,94 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <title>Sequence Generator</title>
+    <script src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
+    <style>
+        body {
+            font-family: 'Times New Roman', serif;
+            background-image: url('bgimg.jpg');
+            background-size: cover;
+            background-attachment: fixed;
+            margin: 0;
+            padding: 0;
+            display: flex;
+            justify-content: center;
+            align-items: center;
+            height: 100vh;
+            flex-direction: column;
+            color: #3e3e3e;
+        }
+        h1, h2 {
+            text-align: center;
+            color: #614532; /* Dark brown color for a rustic look */
+        }
+        form {
+            background-color: rgba(255, 255, 255, 0.8); /* Slightly transparent white */
+            padding: 20px;
+            border-radius: 8px;
+            margin-bottom: 20px;
+        }
+        input, button {
+            display: block;
+            width: 100%;
+            padding: 10px;
+            margin-top: 10px;
+            border-radius: 5px;
+            border: 1px solid #ddd;
+            background: rgba(255, 255, 255, 0.5);
+        }
+        button {
+            background-color: #8a5a44; /* Earthy tone */
+            color: white;
+            border: none;
+            cursor: pointer;
+        }
+        button:hover {
+            background-color: #7d4e3b;
+        }
+        #result {
+            display: none;
+            transition: opacity 1s ease-in-out;
+            background-color: rgba(255, 255, 255, 0.8); /* Slightly transparent white */
+            padding: 20px;
+            border-radius: 8px;
+        }
+        #result.show {
+            display: block;
+            opacity: 1;
+        }
+    </style>
+</head>
+<body>
+    <h1>Generate a William Blake-like Poem</h1>
+    <h2>Rediscover the Beauty of Romanticism Poetry</h2>
+    <form id="sequenceForm">
+        <input type="text" name="initial_sequence" placeholder="few words">
+        <input type="number" name="n_grams" placeholder="n-grams">
+        <input type="number" name="sentence_length" placeholder="Poem Length (in words)">
+        <button type="submit">Generate Poem</button>
+    </form>
+    <div id="result">
+        <h2>Your Generated Poem:</h2>
+        <p id="sequenceOutput"></p>
+    </div>
+    <script>
+        $(document).ready(function() {
+            $('#sequenceForm').on('submit', function(e) {
+                e.preventDefault(); // Prevent the default form submission
+                $.ajax({
+                    url: '/generate_sequence', // Your Flask endpoint
+                    type: 'POST',
+                    data: $(this).serialize(),
+                    success: function(response) {
+                        $('#sequenceOutput').text(response.sequence); // Assuming 'response.sequence' is the output
+                        $('#result').addClass('show');
+                    }
+                });
+            });
+        });
+    </script>
+</body>
+</html>

templates/index_old.html ADDED Viewed

	@@ -0,0 +1,20 @@

+<!DOCTYPE html>
+<html>
+    <head>
+        <title>William Blake Poem Generator</title>
+        <!--<link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='style.css') }}">-->
+    </head>
+    <body>
+        <h1>Using n-grams to generate a william blake-like poem</h1>
+        <h2>A Simple Yet Effective Demonstration</h2>
+    </body>
+<body>
+    <form action="/generate" method="post">
+        <input type="text" name="initial_sequence" placeholder="initial_sequence">
+        <input type="number" name="n_grams" placeholder="n-grams">
+        <input type="number" name="sentence_length" placeholder="Poem Length (in words)">
+        <!-- Add more input fields as necessary -->
+        <button type="submit">Generate Sentence</button>
+    </form>
+</body>
+</html>

templates/result.html ADDED Viewed

	@@ -0,0 +1,10 @@

+<!DOCTYPE html>
+<html>
+<head>
+    <title>Sequence Result</title>
+</head>
+<body>
+    <p>Generated Sequence: {{ sequence }}</p>
+    <a href="/">Try Again</a>
+</body>
+</html>