Spaces:

GoodML
/

dishDecode

Sleeping

App Files Files Community

GoodML commited on Nov 25, 2024

Commit

9dac3f4

verified ·

1 Parent(s): 04d660b

Update app.py

Browse files

Files changed (1) hide show

app.py +209 -209

app.py CHANGED Viewed

@@ -1,175 +1,16 @@
-import os
-import whisper
-import requests
-import asyncio
-import aiohttp  # For making async HTTP requests
-from quart import Quart, request, jsonify, render_template
-from dotenv import load_dotenv
-import warnings
-warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")
-app = Quart(__name__)
-print("APP IS RUNNING, ANIKET")
-# Load the .env file
-load_dotenv()
-print("ENV LOADED, ANIKET")
-# Fetch the API key from the .env file
-API_KEY = os.getenv("FIRST_API_KEY")
-# Ensure the API key is loaded correctly
-if not API_KEY:
-    raise ValueError("API Key not found. Make sure it is set in the .env file.")
-GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
-GEMINI_API_KEY = API_KEY
-# Load Whisper AI model at startup
-print("Loading Whisper AI model..., ANIKET")
-whisper_model = whisper.load_model("base")  # Choose model size: tiny, base, small, medium, large
-print("Whisper AI model loaded successfully, ANIKET")
-@app.route("/", methods=["GET"])
-async def health_check():
-    return jsonify({"status": "success", "message": "API is running successfully!"}), 200
-@app.route("/mbsa")
-async def mbsa():
-    return await render_template("mbsa.html")
-@app.route('/process-audio', methods=['POST'])
-async def process_audio():
-    print("GOT THE PROCESS AUDIO REQUEST, ANIKET")
-    if 'audio' not in request.files:
-        return jsonify({"error": "No audio file provided"}), 400
-    audio_file = request.files['audio']
-    print("AUDIO FILE NAME: ", audio_file)
-    try:
-        print("STARTING TRANSCRIPTION, ANIKET")
-        # Step 1: Transcribe the uploaded audio file asynchronously
-        transcription = await transcribe_audio(audio_file)
-        print("BEFORE THE transcription FAILED ERROR, CHECKING IF I GOT THE TRANSCRIPTION", transcription)
-        if not transcription:
-            return jsonify({"error": "Audio transcription failed"}), 500
-        print("GOT THE transcription")
-        print("Starting the GEMINI REQUEST TO STRUCTURE IT")
-        # Step 2: Generate structured recipe information using Gemini API asynchronously
-        structured_data = await query_gemini_api(transcription)
-        print("GOT THE STRUCTURED DATA", structured_data)
-        # Step 3: Return the structured data
-        return jsonify(structured_data)
-    except Exception as e:
-        return jsonify({"error": str(e)}), 500
-async def transcribe_audio(audio_file):
-    """
-    Transcribe audio using Whisper AI (async function).
-    """
-    print("CAME IN THE transcribe audio function")
-    try:
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file:
-            audio_file.save(temp_audio_file.name)
-            print(f"Temporary audio file saved: {temp_audio_file.name}")
-            # Run Whisper transcription asynchronously
-            loop = asyncio.get_event_loop()
-            result = await loop.run_in_executor(None, whisper_model.transcribe, temp_audio_file.name)
-            print("THE RESULTS ARE", result)
-        return result.get("text", "").strip()
-    except Exception as e:
-        print(f"Error in transcription: {e}")
-        return None
-async def query_gemini_api(transcription):
-    """
-    Send transcription text to Gemini API and fetch structured recipe information (async function).
-    """
-    try:
-        # Define the structured prompt
-        prompt = (
-            "Analyze the provided cooking video transcription and extract the following structured information:\n"
-            "1. Recipe Name: Identify the name of the dish being prepared.\n"
-            "2. Ingredients List: Extract a detailed list of ingredients with their respective quantities (if mentioned).\n"
-            "3. Steps for Preparation: Provide a step-by-step breakdown of the recipe's preparation process, organized and numbered sequentially.\n"
-            "4. Cooking Techniques Used: Highlight the cooking techniques demonstrated in the video, such as searing, blitzing, wrapping, etc.\n"
-            "5. Equipment Needed: List all tools, appliances, or utensils mentioned, e.g., blender, hot pan, cling film, etc.\n"
-            "6. Nutritional Information (if inferred): Provide an approximate calorie count or nutritional breakdown based on the ingredients used.\n"
-            "7. Serving size: In count of people or portion size.\n"
-            "8. Special Notes or Variations: Include any specific tips, variations, or alternatives mentioned.\n"
-            "9. Festive or Thematic Relevance: Note if the recipe has any special relevance to holidays, events, or seasons.\n"
-            f"Text: {transcription}\n"
-        )
-        # Prepare the payload and headers
-        payload = {
-            "contents": [
-                {
-                    "parts": [
-                        {"text": prompt}
-                    ]
-                }
-            ]
-        }
-        headers = {"Content-Type": "application/json"}
-        # Send request to Gemini API asynchronously
-        async with aiohttp.ClientSession() as session:
-            async with session.post(
-                f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
-                json=payload,
-                headers=headers,
-                timeout=60  # 60 seconds timeout for the request
-            ) as response:
-                response.raise_for_status()  # Raise error if response code is not 200
-                data = await response.json()
-        return data.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "No result found")
-    except aiohttp.ClientError as e:
-        print(f"Error querying Gemini API: {e}")
-        return {"error": str(e)}
-if __name__ == '__main__':
-    app.run(debug=True)
-# # Above code is without polling and sleep
 # import os
 # import whisper
 # import requests
-# from flask import Flask, request, jsonify, render_template
-# import tempfile
 # import warnings
 # warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")
-# app = Flask(__name__)
 # print("APP IS RUNNING, ANIKET")
-# # Gemini API settings
-# from dotenv import load_dotenv
 # # Load the .env file
 # load_dotenv()
@@ -185,73 +26,72 @@ if __name__ == '__main__':
 # GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
 # GEMINI_API_KEY = API_KEY
 # # Load Whisper AI model at startup
 # print("Loading Whisper AI model..., ANIKET")
 # whisper_model = whisper.load_model("base")  # Choose model size: tiny, base, small, medium, large
 # print("Whisper AI model loaded successfully, ANIKET")
-# # Define the "/" endpoint for health check
 # @app.route("/", methods=["GET"])
-# def health_check():
 #     return jsonify({"status": "success", "message": "API is running successfully!"}), 200
 # @app.route("/mbsa")
-# def mbsa():
-#     return render_template("mbsa.html")
 # @app.route('/process-audio', methods=['POST'])
-# def process_audio():
 #     print("GOT THE PROCESS AUDIO REQUEST, ANIKET")
-#     """
-#     Flask endpoint to process audio:
-#     1. Transcribe provided audio file using Whisper AI.
-#     2. Send transcription to Gemini API for recipe information extraction.
-#     3. Return structured data in the response.
-#     """
 #     if 'audio' not in request.files:
 #         return jsonify({"error": "No audio file provided"}), 400
 #     audio_file = request.files['audio']
 #     print("AUDIO FILE NAME: ", audio_file)
 #     try:
 #         print("STARTING TRANSCRIPTION, ANIKET")
-#         # Step 1: Transcribe the uploaded audio file directly
-#         audio_file = request.files['audio']
-#         transcription = transcribe_audio(audio_file)
 #         print("BEFORE THE transcription FAILED ERROR, CHECKING IF I GOT THE TRANSCRIPTION", transcription)
 #         if not transcription:
 #             return jsonify({"error": "Audio transcription failed"}), 500
 #         print("GOT THE transcription")
 #         print("Starting the GEMINI REQUEST TO STRUCTURE IT")
-#         # Step 2: Generate structured recipe information using Gemini API
-#         structured_data = query_gemini_api(transcription)
 #         print("GOT THE STRUCTURED DATA", structured_data)
 #         # Step 3: Return the structured data
 #         return jsonify(structured_data)
 #     except Exception as e:
 #         return jsonify({"error": str(e)}), 500
-# def transcribe_audio(audio_path):
 #     """
-#     Transcribe audio using Whisper AI.
 #     """
 #     print("CAME IN THE transcribe audio function")
 #     try:
-#         # Transcribe audio using Whisper AI
-#         print("Transcribing audio...")
-#         result = whisper_model.transcribe(audio_path)
-#         print("THE RESULTS ARE", result)
 #         return result.get("text", "").strip()
 #     except Exception as e:
@@ -259,9 +99,9 @@ if __name__ == '__main__':
 #         return None
-# def query_gemini_api(transcription):
 #     """
-#     Send transcription text to Gemini API and fetch structured recipe information.
 #     """
 #     try:
 #         # Define the structured prompt
@@ -291,21 +131,20 @@ if __name__ == '__main__':
 #         }
 #         headers = {"Content-Type": "application/json"}
-#         # Send request to Gemini API and wait for the response
-#         print("Querying Gemini API...")
-#         response = requests.post(
-#             f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
-#             json=payload,
-#             headers=headers,
-#             timeout=60  # 60 seconds timeout for the request
-#         )
-#         response.raise_for_status()
-#         # Extract and return the structured data
-#         data = response.json()
 #         return data.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "No result found")
-#     except requests.exceptions.RequestException as e:
 #         print(f"Error querying Gemini API: {e}")
 #         return {"error": str(e)}
@@ -317,6 +156,167 @@ if __name__ == '__main__':
 # import os

 # import os
 # import whisper
 # import requests
+# import asyncio
+# import aiohttp  # For making async HTTP requests
+# from quart import Quart, request, jsonify, render_template
+# from dotenv import load_dotenv
 # import warnings
 # warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")
+# app = Quart(__name__)
 # print("APP IS RUNNING, ANIKET")
 # # Load the .env file
 # load_dotenv()
 # GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
 # GEMINI_API_KEY = API_KEY
 # # Load Whisper AI model at startup
 # print("Loading Whisper AI model..., ANIKET")
 # whisper_model = whisper.load_model("base")  # Choose model size: tiny, base, small, medium, large
 # print("Whisper AI model loaded successfully, ANIKET")
 # @app.route("/", methods=["GET"])
+# async def health_check():
 #     return jsonify({"status": "success", "message": "API is running successfully!"}), 200
 # @app.route("/mbsa")
+# async def mbsa():
+#     return await render_template("mbsa.html")
 # @app.route('/process-audio', methods=['POST'])
+# async def process_audio():
 #     print("GOT THE PROCESS AUDIO REQUEST, ANIKET")
 #     if 'audio' not in request.files:
 #         return jsonify({"error": "No audio file provided"}), 400
 #     audio_file = request.files['audio']
 #     print("AUDIO FILE NAME: ", audio_file)
 #     try:
 #         print("STARTING TRANSCRIPTION, ANIKET")
+#         # Step 1: Transcribe the uploaded audio file asynchronously
+#         transcription = await transcribe_audio(audio_file)
 #         print("BEFORE THE transcription FAILED ERROR, CHECKING IF I GOT THE TRANSCRIPTION", transcription)
 #         if not transcription:
 #             return jsonify({"error": "Audio transcription failed"}), 500
 #         print("GOT THE transcription")
 #         print("Starting the GEMINI REQUEST TO STRUCTURE IT")
+#         # Step 2: Generate structured recipe information using Gemini API asynchronously
+#         structured_data = await query_gemini_api(transcription)
 #         print("GOT THE STRUCTURED DATA", structured_data)
 #         # Step 3: Return the structured data
 #         return jsonify(structured_data)
 #     except Exception as e:
 #         return jsonify({"error": str(e)}), 500
+# async def transcribe_audio(audio_file):
 #     """
+#     Transcribe audio using Whisper AI (async function).
 #     """
 #     print("CAME IN THE transcribe audio function")
 #     try:
+#         with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file:
+#             audio_file.save(temp_audio_file.name)
+#             print(f"Temporary audio file saved: {temp_audio_file.name}")
+#             # Run Whisper transcription asynchronously
+#             loop = asyncio.get_event_loop()
+#             result = await loop.run_in_executor(None, whisper_model.transcribe, temp_audio_file.name)
+#             print("THE RESULTS ARE", result)
 #         return result.get("text", "").strip()
 #     except Exception as e:
 #         return None
+# async def query_gemini_api(transcription):
 #     """
+#     Send transcription text to Gemini API and fetch structured recipe information (async function).
 #     """
 #     try:
 #         # Define the structured prompt
 #         }
 #         headers = {"Content-Type": "application/json"}
+#         # Send request to Gemini API asynchronously
+#         async with aiohttp.ClientSession() as session:
+#             async with session.post(
+#                 f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
+#                 json=payload,
+#                 headers=headers,
+#                 timeout=60  # 60 seconds timeout for the request
+#             ) as response:
+#                 response.raise_for_status()  # Raise error if response code is not 200
+#                 data = await response.json()
 #         return data.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "No result found")
+#     except aiohttp.ClientError as e:
 #         print(f"Error querying Gemini API: {e}")
 #         return {"error": str(e)}
+# Above code is without polling and sleep
+import os
+import whisper
+import requests
+from flask import Flask, request, jsonify, render_template
+import tempfile
+import warnings
+warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")
+app = Flask(__name__)
+print("APP IS RUNNING, ANIKET")
+# Gemini API settings
+from dotenv import load_dotenv
+# Load the .env file
+load_dotenv()
+print("ENV LOADED, ANIKET")
+# Fetch the API key from the .env file
+API_KEY = os.getenv("FIRST_API_KEY")
+# Ensure the API key is loaded correctly
+if not API_KEY:
+    raise ValueError("API Key not found. Make sure it is set in the .env file.")
+GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
+GEMINI_API_KEY = API_KEY
+# Load Whisper AI model at startup
+print("Loading Whisper AI model..., ANIKET")
+whisper_model = whisper.load_model("base")  # Choose model size: tiny, base, small, medium, large
+print("Whisper AI model loaded successfully, ANIKET")
+# Define the "/" endpoint for health check
+@app.route("/", methods=["GET"])
+def health_check():
+    return jsonify({"status": "success", "message": "API is running successfully!"}), 200
+@app.route("/mbsa")
+def mbsa():
+    return render_template("mbsa.html")
+@app.route('/process-audio', methods=['POST'])
+def process_audio():
+    print("GOT THE PROCESS AUDIO REQUEST, ANIKET")
+    """
+    Flask endpoint to process audio:
+    1. Transcribe provided audio file using Whisper AI.
+    2. Send transcription to Gemini API for recipe information extraction.
+    3. Return structured data in the response.
+    """
+    if 'audio' not in request.files:
+        return jsonify({"error": "No audio file provided"}), 400
+    audio_file = request.files['audio']
+    print("AUDIO FILE NAME: ", audio_file)
+    try:
+        print("STARTING TRANSCRIPTION, ANIKET")
+        # Step 1: Transcribe the uploaded audio file directly
+        audio_file = request.files['audio']
+        transcription = transcribe_audio(audio_file)
+        print("BEFORE THE transcription FAILED ERROR, CHECKING IF I GOT THE TRANSCRIPTION", transcription)
+        if not transcription:
+            return jsonify({"error": "Audio transcription failed"}), 500
+        print("GOT THE transcription")
+        print("Starting the GEMINI REQUEST TO STRUCTURE IT")
+        # Step 2: Generate structured recipe information using Gemini API
+        structured_data = query_gemini_api(transcription)
+        print("GOT THE STRUCTURED DATA", structured_data)
+        # Step 3: Return the structured data
+        return jsonify(structured_data)
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+def transcribe_audio(audio_path):
+    """
+    Transcribe audio using Whisper AI.
+    """
+    print("CAME IN THE transcribe audio function")
+    try:
+        # Transcribe audio using Whisper AI
+        print("Transcribing audio...")
+        result = whisper_model.transcribe(audio_path)
+        print("THE RESULTS ARE", result)
+        return result.get("text", "").strip()
+    except Exception as e:
+        print(f"Error in transcription: {e}")
+        return None
+def query_gemini_api(transcription):
+    """
+    Send transcription text to Gemini API and fetch structured recipe information.
+    """
+    try:
+        # Define the structured prompt
+        prompt = (
+            "Analyze the provided cooking video transcription and extract the following structured information:\n"
+            "1. Recipe Name: Identify the name of the dish being prepared.\n"
+            "2. Ingredients List: Extract a detailed list of ingredients with their respective quantities (if mentioned).\n"
+            "3. Steps for Preparation: Provide a step-by-step breakdown of the recipe's preparation process, organized and numbered sequentially.\n"
+            "4. Cooking Techniques Used: Highlight the cooking techniques demonstrated in the video, such as searing, blitzing, wrapping, etc.\n"
+            "5. Equipment Needed: List all tools, appliances, or utensils mentioned, e.g., blender, hot pan, cling film, etc.\n"
+            "6. Nutritional Information (if inferred): Provide an approximate calorie count or nutritional breakdown based on the ingredients used.\n"
+            "7. Serving size: In count of people or portion size.\n"
+            "8. Special Notes or Variations: Include any specific tips, variations, or alternatives mentioned.\n"
+            "9. Festive or Thematic Relevance: Note if the recipe has any special relevance to holidays, events, or seasons.\n"
+            f"Text: {transcription}\n"
+        )
+        # Prepare the payload and headers
+        payload = {
+            "contents": [
+                {
+                    "parts": [
+                        {"text": prompt}
+                    ]
+                }
+            ]
+        }
+        headers = {"Content-Type": "application/json"}
+        # Send request to Gemini API and wait for the response
+        print("Querying Gemini API...")
+        response = requests.post(
+            f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
+            json=payload,
+            headers=headers,
+            timeout=60  # 60 seconds timeout for the request
+        )
+        response.raise_for_status()
+        # Extract and return the structured data
+        data = response.json()
+        return data.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "No result found")
+    except requests.exceptions.RequestException as e:
+        print(f"Error querying Gemini API: {e}")
+        return {"error": str(e)}
+if __name__ == '__main__':
+    app.run(debug=True)
 # import os