handwritten_1 / demo_1.py
kpkishankrishna
1
47e726b
import os
import logging
import numpy as np
import svgwrite
import drawing
import lyrics
from rnn import rnn
import numpy as np
import svgwrite
import time
from scipy.signal import savgol_filter
import os
import cairosvg
import cv2
import shutil
import boto3
import requests
import moviepy.editor as mpe
class Hand(object):
def __init__(self):
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
self.nn = rnn(
log_dir='logs',
checkpoint_dir='checkpoints',
prediction_dir='predictions',
learning_rates=[.0001, .00005, .00002],
batch_sizes=[32, 64, 64],
patiences=[1500, 1000, 500],
beta1_decays=[.9, .9, .9],
validation_batch_size=32,
optimizer='rms',
num_training_steps=100000,
warm_start_init_step=17900,
regularization_constant=0.0,
keep_prob=1.0,
enable_parameter_averaging=False,
min_steps_to_checkpoint=2000,
log_interval=20,
logging_level=logging.CRITICAL,
grad_clip=10,
lstm_size=400,
output_mixture_components=20,
attention_mixture_components=10
)
self.nn.restore()
def write(self, script, lines, biases=None, styles=None, stroke_colors=None, stroke_widths=None):
valid_char_set = set(drawing.alphabet)
for line_num, line in enumerate(lines):
if len(line) > 75:
raise ValueError(
(
"Each line must be at most 75 characters. "
"Line {} contains {}"
).format(line_num, len(line))
)
for char in line:
if char not in valid_char_set:
raise ValueError(
(
"Invalid character {} detected in line {}. "
"Valid character set is {}"
).format(char, line_num, valid_char_set)
)
strokes = self._sample(lines, biases=biases, styles=styles)
return self._draw(strokes, lines, script, stroke_colors=stroke_colors, stroke_widths=stroke_widths)
def _sample(self, lines, biases=None, styles=None):
biases = [.75 for i in lines]
styles = [9 for i in lines]
num_samples = len(lines)
max_tsteps = 40*max([len(i) for i in lines])
biases = biases if biases is not None else [0.5]*num_samples
x_prime = np.zeros([num_samples, 1200, 3])
x_prime_len = np.zeros([num_samples])
chars = np.zeros([num_samples, 120])
chars_len = np.zeros([num_samples])
if styles is not None:
for i, (cs, style) in enumerate(zip(lines, styles)):
x_p = np.load('styles/style-{}-strokes.npy'.format(style))
c_p = np.load('styles/style-{}-chars.npy'.format(style)).tostring().decode('utf-8')
c_p = str(c_p) + " " + cs
c_p = drawing.encode_ascii(c_p)
c_p = np.array(c_p)
x_prime[i, :len(x_p), :] = x_p
x_prime_len[i] = len(x_p)
chars[i, :len(c_p)] = c_p
chars_len[i] = len(c_p)
else:
for i in range(num_samples):
encoded = drawing.encode_ascii(lines[i])
chars[i, :len(encoded)] = encoded
chars_len[i] = len(encoded)
[samples] = self.nn.session.run(
[self.nn.sampled_sequence],
feed_dict={
self.nn.prime: styles is not None,
self.nn.x_prime: x_prime,
self.nn.x_prime_len: x_prime_len,
self.nn.num_samples: num_samples,
self.nn.sample_tsteps: max_tsteps,
self.nn.c: chars,
self.nn.c_len: chars_len,
self.nn.bias: biases
}
)
samples = [sample[~np.all(sample == 0.0, axis=1)] for sample in samples]
return samples
def audio(self, script):
url = 'https://7eo46e4jad.execute-api.ap-south-1.amazonaws.com/polly_1'
params = {'text': f"""{script}"""}
# params = {'text': """
# Hello,
# My name is Krishna.
# Welcome to this voice over.
# Hai Yashwanth , this is the first iteration of video with mp3.
# The time lag which I have mentioned for converting the png to svg has been taken care of.
# I took a bit of time as I initially thought of implementing this directly as html output, but since it was taking too much time, I had to look for other ways.
# I then observed that the svg files were very large considered to the original files which is why it is taking so long.
# So I have rewritten the code addressing this issue
# """}
response = requests.get(url=url, params=params)
print(response.text)
s3 = boto3.client('s3',aws_access_key_id= 'AKIAX5UZSDZVJOBBZGSP' , aws_secret_access_key='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')
with open(f'video/{response.text}.mp3', 'wb') as f:
s3.download_fileobj('polly-aws-lambda', f'{response.text}.mp3', f)
f.seek(0)
return response.text
def mix_video_audio(self, video_file, audio_file, final_file, fps=60):
my_clip = mpe.VideoFileClip(video_file)
audio_background = mpe.AudioFileClip(audio_file)
final_clip = my_clip.set_audio(audio_background)
final_clip.write_videofile(final_file,fps=fps)
os.remove(video_file)
os.remove(audio_file)
def _draw(self, strokes, lines, script, stroke_colors=None, stroke_widths=None):
folder = int(time.time())
os.makedirs(f'image/{folder}/png')
filename = f'image/{folder}'
stroke_colors = stroke_colors or ['black']*len(lines)
stroke_widths = stroke_widths or [2]*len(lines)
line_height = 60
view_width = 1000
view_height = line_height*(len(strokes) + 1)
# codec = cv2.VideoWriter_fourcc(*"mp4v")
# fps = 10
# frame_width, frame_height = (1000, 300)
# video = int(time.time())
# out = cv2.VideoWriter(f"{video}.mp4", codec, fps, (frame_width, frame_height))
initial_coord = np.array([0, -(3*line_height / 4)])
line_count = 0
p = "M{},{} ".format(0, 0)
for offsets, line, color, width in zip(strokes, lines, stroke_colors, stroke_widths):
if not line:
initial_coord[1] -= line_height
continue
offsets[:, :2] *= 1.5
strokes = drawing.offsets_to_coords(offsets)
strokes = drawing.denoise(strokes)
strokes[:, :2] = drawing.align(strokes[:, :2])
strokes[:, 1] *= -1
strokes[:, :2] -= strokes[:, :2].min() + initial_coord
strokes[:, 0] += (view_width - strokes[:, 0].max()) / 2
prev_eos = 1.0
count = 0
for x, y, eos in zip(*strokes.T):
p += '{}{},{} '.format('M' if prev_eos == 1.0 else 'L', x, y)
prev_eos = eos
if count % 5 ==0:
path = svgwrite.path.Path(p)
path = path.stroke(color=color, width=width, linecap='round').fill("none")
name = 10000000+count
filename_1 = f'{filename}/{line_count}_{name}.svg'
dwg_1 = svgwrite.Drawing(filename=filename_1)
dwg_1.viewbox(width=view_width, height=view_height)
dwg_1.add(dwg_1.rect(insert=(0, 0), size=(view_width, view_height), fill='white'))
dwg_1.add(path)
dwg_1.save()
cairosvg.svg2png(url=f'{filename}/{line_count}_{name}.svg', write_to=f'{filename}/png/{line_count}_{name}.png')
# img = cv2.imread(f'{filename}/png/{line_count}_{name}.png')
# out.write(img)
count+=1
line_count+=1
initial_coord[1] -= line_height
# video
codec = cv2.VideoWriter_fourcc(*"mp4v")
fps = 10
frame_width, frame_height = (1000, 300)
video = int(time.time())
out = cv2.VideoWriter(f"video/{video}.mp4", codec, fps, (frame_width, frame_height))
images = os.listdir(f'{filename}/png')
for i in images:
img = cv2.imread(f'{filename}/png/{i}')
height, width, channels = img.shape
print('Width:', width)
print('Height:', height)
break
codec = cv2.VideoWriter_fourcc(*"mp4v")
fps = 10
frame_width, frame_height = (1000, 300)
out = cv2.VideoWriter(f"video/{video}.mp4", codec, fps, (width, height))
# Loop through each image and write it to the video file
for i in images:
img = cv2.imread(f'{filename}/png/{i}')
out.write(img)
print(i)
# Release the video writer and close the output file
out.release()
out.release()
shutil.rmtree(filename)
audio_file = self.audio(script)
final_time = int(time.time())
self.mix_video_audio(f"video/{video}.mp4", f"video/{audio_file}.mp3", f"video/{final_time}.mp4")
return final_time
if __name__ == '__main__':
hand = Hand()
# usage demo
# lines = [
# "Now this is a story all about how",
# "My life got flipped turned upside down",
# "And I'd like to take a minute, just sit right there",
# "I'll tell you how I became the prince of a town called Bel-Air",
# ]
lines = [
"Hello Krishna",
"This image is generated by local",
"The one you found out the yesterday",
"It is generated in my desktop"
]
biases = [.75 for i in lines]
styles = [9 for i in lines]
# stroke_colors = ['red', 'green', 'black', 'blue']
# stroke_widths = [1, 2, 1, 2]
script = "Hello, how are you today. I am fine"
hand.write(
script= script,
lines=lines
# biases=biases,
# styles=styles,
# stroke_colors=stroke_colors,
# stroke_widths=stroke_widths
)