LyricGen / main.py
thejagstudio's picture
Update main.py
aed24eb verified
import os
import io
import base64
from flask import Flask, request, render_template, jsonify
from google import genai
from google.genai import types
from werkzeug.utils import secure_filename
import requests
import time
import tempfile
# Initialize Flask app
app = Flask(__name__)
# Initialize Gemini client
client = genai.Client(api_key=os.environ.get("GEMINI_API_KEY", "AIzaSyCg9NGsLygb0sVKpviMkgV4eMPLd9nXW7w"))
# Set up generation config
generate_content_config = types.GenerateContentConfig(
temperature=1.0,
top_p=0.95,
top_k=40,
max_output_tokens=8192,
)
# Initialize the model
model_name = "gemini-3-flash-preview"
@app.route("/")
def index():
return render_template("index.html")
@app.route("/convert", methods=["POST"])
def convert_audio():
if "file" not in request.files:
return jsonify({"error": "No file part"}), 400
file = request.files["file"]
if file.filename == "":
return jsonify({"error": "No selected file"}), 400
# Get the mode type from form data
mode_type = request.form.get("type", "music") # Default to music mode
if file:
try:
# Determine mime type based on file extension
mime_type = None
if file.filename.lower().endswith('.mp3'):
mime_type = 'audio/mpeg'
elif file.filename.lower().endswith('.wav'):
mime_type = 'audio/wav'
elif file.filename.lower().endswith('.m4a'):
mime_type = 'audio/x-m4a'
elif file.filename.lower().endswith('.ogg'):
mime_type = 'audio/ogg'
else:
return jsonify({"error": "Unsupported file type"}), 400
# Save the file temporarily
temp_dir = tempfile.mkdtemp()
temp_path = os.path.join(temp_dir, secure_filename(file.filename))
file.save(temp_path)
# Upload file to Gemini with mime type
with open(temp_path, 'rb') as f:
uploaded_file = client.files.upload(file=f,config={'mime_type': mime_type})
# Clean up temporary file
os.remove(temp_path)
os.rmdir(temp_dir)
# Choose prompt based on mode
if mode_type == "speech":
prompt = """Extract the Gujarati speech content from the provided audio file and output it using the following HTML template. Format the content into meaningful paragraphs instead of line breaks. Ensure that each paragraph flows naturally and represents coherent thoughts or topics from the speech.
```
<html>
<head>
<title>Pravachan</title>
<meta http-equiv="content-type" content="text/html;charset=UTF-8" />
<link href="/static/simple.css" rel="stylesheet" type="text/css" />
<style>
</style>
</head>
<body>
<div class="main">
<div class="gtitlev3">
{Title of the audio content}
</div>
<div class="gpara">
<p>First paragraph of the speech content...</p>
<p>Second paragraph of the speech content...</p>
<p>Continue with more paragraphs as needed...</p>
</div>
<div class="chend">
*****
</div>
</div>
</body>
</html>
```"""
else: # Default to music mode
prompt = """Extract the Gujarati lyrics from the provided audio file and output them using the following HTML template. Ensure that each line of the lyrics is correctly inserted into the template and that any placeholders are replaced with the appropriate content from the file.
```
<html>
<head>
<title>Bhaktisudha</title>
<meta http-equiv="content-type" content="text/html;charset=UTF-8" />
<link href="/static/simple.css" rel="stylesheet" type="text/css" />
<style>
</style>
</head>
<body>
<div class="main">
<div class="gtitlev3">
{Title of the file}
</div>
<div class="gpara">
Line1 <br/>
Line2 <br/>
</div>
<div class="chend">
*****
</div>
</div>
</body>
</html>
```"""
# Generate response
response = ""
for chunk in client.models.generate_content_stream(
model=model_name,
contents=[uploaded_file, prompt],
config=generate_content_config,
):
response += chunk.text
# Clean up the response
lyrics = response.replace("```html", "").replace("```", "")
return jsonify({"lyrics": lyrics})
except Exception as e:
return jsonify({"error": str(e)}), 500
@app.route("/convert-youtube", methods=["POST"])
def convert_youtube():
try:
data = request.get_json()
url = data.get("url")
mode_type = data.get("type", "music") # Get mode type, default to music
if not url:
return jsonify({"error": "No URL provided"}), 400
# Choose prompt based on mode
if mode_type == "speech":
prompt_text = """Extract the Gujarati speech content from the provided video and output it using the following HTML template. Format the content into meaningful paragraphs instead of line breaks. Ensure that each paragraph flows naturally and represents coherent thoughts or topics from the speech.
```
<html>
<head>
<title>Pravachan</title>
<meta http-equiv="content-type" content="text/html;charset=UTF-8" />
<link href="/static/simple.css" rel="stylesheet" type="text/css" />
<style>
</style>
</head>
<body>
<div class="main">
<div class="gtitlev3">
{Title of the video content}
</div>
<div class="gpara">
<p>First paragraph of the speech content...</p>
<p>Second paragraph of the speech content...</p>
<p>Continue with more paragraphs as needed...</p>
</div>
<div class="chend">
*****
</div>
</div>
</body>
</html>
```"""
else: # Default to music mode
prompt_text = """Extract the Gujarati lyrics from the provided video and output them using the following HTML template. Ensure that each line of the lyrics is correctly inserted into the template and that any placeholders are replaced with the appropriate content from the file.
```
<html>
<head>
<title>Bhaktisudha</title>
<meta http-equiv="content-type" content="text/html;charset=UTF-8" />
<link href="/static/simple.css" rel="stylesheet" type="text/css" />
<style>
</style>
</head>
<body>
<div class="main">
<div class="gtitlev3">
{Title of the file}
</div>
<div class="gpara">
Line1 <br/>
Line2 <br/>
</div>
<div class="chend">
*****
</div>
</div>
</body>
</html>
```"""
# Create content for Gemini
contents = [
types.Content(
role="user",
parts=[
types.Part(
file_data=types.FileData(
file_uri=url,
mime_type="video/*",
)
),
types.Part.from_text(text=prompt_text),
],
),
]
# Generate response
response = ""
for chunk in client.models.generate_content_stream(
model=model_name,
contents=contents,
config=generate_content_config,
):
response += chunk.text
# Clean up the response
lyrics = response.replace("```html", "").replace("```", "")
return jsonify({"lyrics": lyrics})
except Exception as e:
return jsonify({"error": str(e)})
if __name__ == "__main__":
app.run(debug=True, host="0.0.0.0", port=7860)