Spaces:
Sleeping
Sleeping
| import os | |
| import io | |
| import base64 | |
| from flask import Flask, request, render_template, jsonify | |
| from google import genai | |
| from google.genai import types | |
| from werkzeug.utils import secure_filename | |
| import requests | |
| import time | |
| import tempfile | |
| # Initialize Flask app | |
| app = Flask(__name__) | |
| # Initialize Gemini client | |
| client = genai.Client(api_key=os.environ.get("GEMINI_API_KEY", "AIzaSyCg9NGsLygb0sVKpviMkgV4eMPLd9nXW7w")) | |
| # Set up generation config | |
| generate_content_config = types.GenerateContentConfig( | |
| temperature=1.0, | |
| top_p=0.95, | |
| top_k=40, | |
| max_output_tokens=8192, | |
| ) | |
| # Initialize the model | |
| model_name = "gemini-3-flash-preview" | |
| def index(): | |
| return render_template("index.html") | |
| def convert_audio(): | |
| if "file" not in request.files: | |
| return jsonify({"error": "No file part"}), 400 | |
| file = request.files["file"] | |
| if file.filename == "": | |
| return jsonify({"error": "No selected file"}), 400 | |
| # Get the mode type from form data | |
| mode_type = request.form.get("type", "music") # Default to music mode | |
| if file: | |
| try: | |
| # Determine mime type based on file extension | |
| mime_type = None | |
| if file.filename.lower().endswith('.mp3'): | |
| mime_type = 'audio/mpeg' | |
| elif file.filename.lower().endswith('.wav'): | |
| mime_type = 'audio/wav' | |
| elif file.filename.lower().endswith('.m4a'): | |
| mime_type = 'audio/x-m4a' | |
| elif file.filename.lower().endswith('.ogg'): | |
| mime_type = 'audio/ogg' | |
| else: | |
| return jsonify({"error": "Unsupported file type"}), 400 | |
| # Save the file temporarily | |
| temp_dir = tempfile.mkdtemp() | |
| temp_path = os.path.join(temp_dir, secure_filename(file.filename)) | |
| file.save(temp_path) | |
| # Upload file to Gemini with mime type | |
| with open(temp_path, 'rb') as f: | |
| uploaded_file = client.files.upload(file=f,config={'mime_type': mime_type}) | |
| # Clean up temporary file | |
| os.remove(temp_path) | |
| os.rmdir(temp_dir) | |
| # Choose prompt based on mode | |
| if mode_type == "speech": | |
| prompt = """Extract the Gujarati speech content from the provided audio file and output it using the following HTML template. Format the content into meaningful paragraphs instead of line breaks. Ensure that each paragraph flows naturally and represents coherent thoughts or topics from the speech. | |
| ``` | |
| <html> | |
| <head> | |
| <title>Pravachan</title> | |
| <meta http-equiv="content-type" content="text/html;charset=UTF-8" /> | |
| <link href="/static/simple.css" rel="stylesheet" type="text/css" /> | |
| <style> | |
| </style> | |
| </head> | |
| <body> | |
| <div class="main"> | |
| <div class="gtitlev3"> | |
| {Title of the audio content} | |
| </div> | |
| <div class="gpara"> | |
| <p>First paragraph of the speech content...</p> | |
| <p>Second paragraph of the speech content...</p> | |
| <p>Continue with more paragraphs as needed...</p> | |
| </div> | |
| <div class="chend"> | |
| ***** | |
| </div> | |
| </div> | |
| </body> | |
| </html> | |
| ```""" | |
| else: # Default to music mode | |
| prompt = """Extract the Gujarati lyrics from the provided audio file and output them using the following HTML template. Ensure that each line of the lyrics is correctly inserted into the template and that any placeholders are replaced with the appropriate content from the file. | |
| ``` | |
| <html> | |
| <head> | |
| <title>Bhaktisudha</title> | |
| <meta http-equiv="content-type" content="text/html;charset=UTF-8" /> | |
| <link href="/static/simple.css" rel="stylesheet" type="text/css" /> | |
| <style> | |
| </style> | |
| </head> | |
| <body> | |
| <div class="main"> | |
| <div class="gtitlev3"> | |
| {Title of the file} | |
| </div> | |
| <div class="gpara"> | |
| Line1 <br/> | |
| Line2 <br/> | |
| </div> | |
| <div class="chend"> | |
| ***** | |
| </div> | |
| </div> | |
| </body> | |
| </html> | |
| ```""" | |
| # Generate response | |
| response = "" | |
| for chunk in client.models.generate_content_stream( | |
| model=model_name, | |
| contents=[uploaded_file, prompt], | |
| config=generate_content_config, | |
| ): | |
| response += chunk.text | |
| # Clean up the response | |
| lyrics = response.replace("```html", "").replace("```", "") | |
| return jsonify({"lyrics": lyrics}) | |
| except Exception as e: | |
| return jsonify({"error": str(e)}), 500 | |
| def convert_youtube(): | |
| try: | |
| data = request.get_json() | |
| url = data.get("url") | |
| mode_type = data.get("type", "music") # Get mode type, default to music | |
| if not url: | |
| return jsonify({"error": "No URL provided"}), 400 | |
| # Choose prompt based on mode | |
| if mode_type == "speech": | |
| prompt_text = """Extract the Gujarati speech content from the provided video and output it using the following HTML template. Format the content into meaningful paragraphs instead of line breaks. Ensure that each paragraph flows naturally and represents coherent thoughts or topics from the speech. | |
| ``` | |
| <html> | |
| <head> | |
| <title>Pravachan</title> | |
| <meta http-equiv="content-type" content="text/html;charset=UTF-8" /> | |
| <link href="/static/simple.css" rel="stylesheet" type="text/css" /> | |
| <style> | |
| </style> | |
| </head> | |
| <body> | |
| <div class="main"> | |
| <div class="gtitlev3"> | |
| {Title of the video content} | |
| </div> | |
| <div class="gpara"> | |
| <p>First paragraph of the speech content...</p> | |
| <p>Second paragraph of the speech content...</p> | |
| <p>Continue with more paragraphs as needed...</p> | |
| </div> | |
| <div class="chend"> | |
| ***** | |
| </div> | |
| </div> | |
| </body> | |
| </html> | |
| ```""" | |
| else: # Default to music mode | |
| prompt_text = """Extract the Gujarati lyrics from the provided video and output them using the following HTML template. Ensure that each line of the lyrics is correctly inserted into the template and that any placeholders are replaced with the appropriate content from the file. | |
| ``` | |
| <html> | |
| <head> | |
| <title>Bhaktisudha</title> | |
| <meta http-equiv="content-type" content="text/html;charset=UTF-8" /> | |
| <link href="/static/simple.css" rel="stylesheet" type="text/css" /> | |
| <style> | |
| </style> | |
| </head> | |
| <body> | |
| <div class="main"> | |
| <div class="gtitlev3"> | |
| {Title of the file} | |
| </div> | |
| <div class="gpara"> | |
| Line1 <br/> | |
| Line2 <br/> | |
| </div> | |
| <div class="chend"> | |
| ***** | |
| </div> | |
| </div> | |
| </body> | |
| </html> | |
| ```""" | |
| # Create content for Gemini | |
| contents = [ | |
| types.Content( | |
| role="user", | |
| parts=[ | |
| types.Part( | |
| file_data=types.FileData( | |
| file_uri=url, | |
| mime_type="video/*", | |
| ) | |
| ), | |
| types.Part.from_text(text=prompt_text), | |
| ], | |
| ), | |
| ] | |
| # Generate response | |
| response = "" | |
| for chunk in client.models.generate_content_stream( | |
| model=model_name, | |
| contents=contents, | |
| config=generate_content_config, | |
| ): | |
| response += chunk.text | |
| # Clean up the response | |
| lyrics = response.replace("```html", "").replace("```", "") | |
| return jsonify({"lyrics": lyrics}) | |
| except Exception as e: | |
| return jsonify({"error": str(e)}) | |
| if __name__ == "__main__": | |
| app.run(debug=True, host="0.0.0.0", port=7860) |