First paragraph of the speech content...
Second paragraph of the speech content...
Continue with more paragraphs as needed...
import os import io import base64 from flask import Flask, request, render_template, jsonify from google import genai from google.genai import types from werkzeug.utils import secure_filename import requests import time import tempfile # Initialize Flask app app = Flask(__name__) # Initialize Gemini client client = genai.Client(api_key=os.environ.get("GEMINI_API_KEY", "AIzaSyCg9NGsLygb0sVKpviMkgV4eMPLd9nXW7w")) # Set up generation config generate_content_config = types.GenerateContentConfig( temperature=1.0, top_p=0.95, top_k=40, max_output_tokens=8192, ) # Initialize the model model_name = "gemini-3-flash-preview" @app.route("/") def index(): return render_template("index.html") @app.route("/convert", methods=["POST"]) def convert_audio(): if "file" not in request.files: return jsonify({"error": "No file part"}), 400 file = request.files["file"] if file.filename == "": return jsonify({"error": "No selected file"}), 400 # Get the mode type from form data mode_type = request.form.get("type", "music") # Default to music mode if file: try: # Determine mime type based on file extension mime_type = None if file.filename.lower().endswith('.mp3'): mime_type = 'audio/mpeg' elif file.filename.lower().endswith('.wav'): mime_type = 'audio/wav' elif file.filename.lower().endswith('.m4a'): mime_type = 'audio/x-m4a' elif file.filename.lower().endswith('.ogg'): mime_type = 'audio/ogg' else: return jsonify({"error": "Unsupported file type"}), 400 # Save the file temporarily temp_dir = tempfile.mkdtemp() temp_path = os.path.join(temp_dir, secure_filename(file.filename)) file.save(temp_path) # Upload file to Gemini with mime type with open(temp_path, 'rb') as f: uploaded_file = client.files.upload(file=f,config={'mime_type': mime_type}) # Clean up temporary file os.remove(temp_path) os.rmdir(temp_dir) # Choose prompt based on mode if mode_type == "speech": prompt = """Extract the Gujarati speech content from the provided audio file and output it using the following HTML template. Format the content into meaningful paragraphs instead of line breaks. Ensure that each paragraph flows naturally and represents coherent thoughts or topics from the speech. ```
First paragraph of the speech content...
Second paragraph of the speech content...
Continue with more paragraphs as needed...
First paragraph of the speech content...
Second paragraph of the speech content...
Continue with more paragraphs as needed...