import os import io import base64 from flask import Flask, request, render_template, jsonify from google import genai from google.genai import types from werkzeug.utils import secure_filename import requests import time import tempfile # Initialize Flask app app = Flask(__name__) # Initialize Gemini client client = genai.Client(api_key=os.environ.get("GEMINI_API_KEY", "AIzaSyCg9NGsLygb0sVKpviMkgV4eMPLd9nXW7w")) # Set up generation config generate_content_config = types.GenerateContentConfig( temperature=1.0, top_p=0.95, top_k=40, max_output_tokens=8192, ) # Initialize the model model_name = "gemini-3-flash-preview" @app.route("/") def index(): return render_template("index.html") @app.route("/convert", methods=["POST"]) def convert_audio(): if "file" not in request.files: return jsonify({"error": "No file part"}), 400 file = request.files["file"] if file.filename == "": return jsonify({"error": "No selected file"}), 400 # Get the mode type from form data mode_type = request.form.get("type", "music") # Default to music mode if file: try: # Determine mime type based on file extension mime_type = None if file.filename.lower().endswith('.mp3'): mime_type = 'audio/mpeg' elif file.filename.lower().endswith('.wav'): mime_type = 'audio/wav' elif file.filename.lower().endswith('.m4a'): mime_type = 'audio/x-m4a' elif file.filename.lower().endswith('.ogg'): mime_type = 'audio/ogg' else: return jsonify({"error": "Unsupported file type"}), 400 # Save the file temporarily temp_dir = tempfile.mkdtemp() temp_path = os.path.join(temp_dir, secure_filename(file.filename)) file.save(temp_path) # Upload file to Gemini with mime type with open(temp_path, 'rb') as f: uploaded_file = client.files.upload(file=f,config={'mime_type': mime_type}) # Clean up temporary file os.remove(temp_path) os.rmdir(temp_dir) # Choose prompt based on mode if mode_type == "speech": prompt = """Extract the Gujarati speech content from the provided audio file and output it using the following HTML template. Format the content into meaningful paragraphs instead of line breaks. Ensure that each paragraph flows naturally and represents coherent thoughts or topics from the speech. ``` Pravachan
{Title of the audio content}

First paragraph of the speech content...

Second paragraph of the speech content...

Continue with more paragraphs as needed...

*****
```""" else: # Default to music mode prompt = """Extract the Gujarati lyrics from the provided audio file and output them using the following HTML template. Ensure that each line of the lyrics is correctly inserted into the template and that any placeholders are replaced with the appropriate content from the file. ``` Bhaktisudha
{Title of the file}
Line1
Line2
*****
```""" # Generate response response = "" for chunk in client.models.generate_content_stream( model=model_name, contents=[uploaded_file, prompt], config=generate_content_config, ): response += chunk.text # Clean up the response lyrics = response.replace("```html", "").replace("```", "") return jsonify({"lyrics": lyrics}) except Exception as e: return jsonify({"error": str(e)}), 500 @app.route("/convert-youtube", methods=["POST"]) def convert_youtube(): try: data = request.get_json() url = data.get("url") mode_type = data.get("type", "music") # Get mode type, default to music if not url: return jsonify({"error": "No URL provided"}), 400 # Choose prompt based on mode if mode_type == "speech": prompt_text = """Extract the Gujarati speech content from the provided video and output it using the following HTML template. Format the content into meaningful paragraphs instead of line breaks. Ensure that each paragraph flows naturally and represents coherent thoughts or topics from the speech. ``` Pravachan
{Title of the video content}

First paragraph of the speech content...

Second paragraph of the speech content...

Continue with more paragraphs as needed...

*****
```""" else: # Default to music mode prompt_text = """Extract the Gujarati lyrics from the provided video and output them using the following HTML template. Ensure that each line of the lyrics is correctly inserted into the template and that any placeholders are replaced with the appropriate content from the file. ``` Bhaktisudha
{Title of the file}
Line1
Line2
*****
```""" # Create content for Gemini contents = [ types.Content( role="user", parts=[ types.Part( file_data=types.FileData( file_uri=url, mime_type="video/*", ) ), types.Part.from_text(text=prompt_text), ], ), ] # Generate response response = "" for chunk in client.models.generate_content_stream( model=model_name, contents=contents, config=generate_content_config, ): response += chunk.text # Clean up the response lyrics = response.replace("```html", "").replace("```", "") return jsonify({"lyrics": lyrics}) except Exception as e: return jsonify({"error": str(e)}) if __name__ == "__main__": app.run(debug=True, host="0.0.0.0", port=7860)