Spaces:

Mahmous
/

chatbot3

Sleeping

App Files Files Community

Mahmous commited on Nov 2, 2025

Commit

c3cdaaf

verified ·

1 Parent(s): 8d171d0

Update api.py

Browse files

Files changed (1) hide show

api.py +63 -38

api.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import os
 import traceback
-from flask import Flask, request, jsonify
 from flask_cors import CORS
 from dotenv import load_dotenv
 from langdetect import detect
@@ -8,37 +8,44 @@ from deep_translator import GoogleTranslator
 from sentence_transformers import SentenceTransformer
 from pinecone import Pinecone
 from openai import OpenAI
 # ---------- Config ----------
 DATASET_PATH = "data/coaching_millionaer_dataset.json"
 load_dotenv(override=True)
-# Load secrets from Hugging Face Space
-HF_TOKEN = os.getenv("HF_TOKEN")
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
 PINECONE_INDEX_NAME = "ebook"
-# ---------- App ----------
 app = Flask(__name__)
 CORS(app, resources={r"/ask": {"origins": "*"}})
-# ---------- LLM Client ----------
 client = None
 try:
-    if HF_TOKEN:
-        # Use Hugging Face Inference Provider
-        client = OpenAI(
-            base_url="https://router.huggingface.co/v1",
-            api_key=HF_TOKEN,
-        )
-        print("✅ Using Hugging Face Inference Provider (OpenAI-compatible API)")
-    elif OPENAI_API_KEY:
-        # Fallback to OpenAI if provided
-        client = OpenAI(api_key=OPENAI_API_KEY)
-        print("✅ Using OpenAI client directly")
-    else:
-        raise ValueError("No valid API key found. Set HF_TOKEN or OPENAI_API_KEY.")
 except Exception as e:
     print(f"❌ Failed to initialize LLM client: {e}")
     client = None
@@ -94,6 +101,7 @@ def detect_language(question: str) -> str:
     except Exception:
         return "unknown"
 def normalize_language(lang: str, text: str) -> str:
     if lang == "nl" and any(
         word in text.lower() for word in ["wer", "was", "wie", "javid", "coaching"]
@@ -101,6 +109,7 @@ def normalize_language(lang: str, text: str) -> str:
         return "de"
     return lang
 def system_prompt_book_only() -> str:
     return (
         "You are CoachingBot, a professional mentor trained on the book 'Coaching Millionär' by Javid Niazi-Hoffmann. "
@@ -111,6 +120,7 @@ def system_prompt_book_only() -> str:
         "Always respond in the same language as the user's question."
     )
 def system_prompt_fallback() -> str:
     return (
         "You are CoachingBot, a helpful business and life mentor. "
@@ -119,6 +129,7 @@ def system_prompt_fallback() -> str:
         "Do not invent book citations."
     )
 def format_answers(question: str, answer: str, results):
     pages = [f"Seite {r.get('page', '')}" for r in results if r.get("page")]
     source = ", ".join(pages) if pages else "No source"
@@ -191,10 +202,10 @@ def ask():
     if client is None:
         return jsonify(format_answers(question, "⚠️ No language model initialized.", results)), 200
-    # ---------- LLM Query ----------
     try:
         response = client.chat.completions.create(
-            model="openai/gpt-oss-120b:cerebras",  # Hugging Face model
             messages=[
                 {"role": "system", "content": sys_prompt},
                 {"role": "user", "content": user_content},
@@ -207,10 +218,8 @@ def ask():
         return jsonify(format_answers(question, f"⚠️ LLM call failed: {e}", results)), 200
     return jsonify(format_answers(question, answer, results))
-from flask import send_file
-import tempfile
 @app.route("/voice", methods=["POST"])
 def voice_chat():
     try:
@@ -218,39 +227,53 @@ def voice_chat():
         if not audio:
             return jsonify({"error": "No audio file uploaded"}), 400
         with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
             audio.save(tmp.name)
             audio_path = tmp.name
-        # Step 1️⃣: Transcribe using OpenAI Whisper or any STT
         transcription = client.audio.transcriptions.create(
             model="whisper-1",
-            file=open(audio_path, "rb")
         )
         text = transcription.text.strip()
         print(f"🎤 Transcribed: {text}")
-        # Step 2️⃣: Get mentoring answer from your existing /ask logic
-        data = {"question": text}
-        with app.test_request_context(json=data):
-            response = ask()
-        response_json = response.get_json()
         # Step 3️⃣: Optional TTS response
-        answer_text = response_json["answers"][0]["answer"]
         speech_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
         with client.audio.speech.with_streaming_response.create(
             model="gpt-4o-mini-tts",
             voice="alloy",
-            input=answer_text
         ) as speech:
             speech.stream_to_file(speech_file.name)
-        return jsonify({
-            "transcript": text,
-            "answer": answer_text,
-            "audio_url": f"/audio/{os.path.basename(speech_file.name)}"
-        })
     except Exception as e:
         traceback.print_exc()
         return jsonify({"error": str(e)}), 500
@@ -258,7 +281,9 @@ def voice_chat():
 @app.route("/audio/<filename>")
 def serve_audio(filename):
-    return send_file(os.path.join(tempfile.gettempdir(), filename), mimetype="audio/mpeg")
 # ---------- Run ----------
 if __name__ == "__main__":

 import os
 import traceback
+from flask import Flask, request, jsonify, send_file
 from flask_cors import CORS
 from dotenv import load_dotenv
 from langdetect import detect
 from sentence_transformers import SentenceTransformer
 from pinecone import Pinecone
 from openai import OpenAI
+import tempfile
 # ---------- Config ----------
 DATASET_PATH = "data/coaching_millionaer_dataset.json"
 load_dotenv(override=True)
+# Environment variables
+HF_TOKEN = os.getenv("HF_TOKEN")  # (commented logic below if you want to re-enable HF)
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
 PINECONE_INDEX_NAME = "ebook"
+# ---------- Flask App ----------
 app = Flask(__name__)
 CORS(app, resources={r"/ask": {"origins": "*"}})
+# ---------- LLM Client Setup ----------
 client = None
 try:
+    # --- OLD Hugging Face Setup (disabled) ---
+    # if HF_TOKEN:
+    #     client = OpenAI(
+    #         base_url="https://router.huggingface.co/v1",
+    #         api_key=HF_TOKEN,
+    #     )
+    #     print("✅ Using Hugging Face Inference Provider (OpenAI-compatible API)")
+    # elif OPENAI_API_KEY:
+    #     client = OpenAI(api_key=OPENAI_API_KEY)
+    #     print("✅ Using OpenAI client directly")
+    # else:
+    #     raise ValueError("No valid API key found. Set HF_TOKEN or OPENAI_API_KEY.")
+    # --- NEW: Unified OpenAI Client (for Whisper, GPT, and TTS) ---
+    if not OPENAI_API_KEY:
+        raise ValueError("⚠️ Missing OPENAI_API_KEY in environment variables")
+    client = OpenAI(api_key=OPENAI_API_KEY)
+    print("✅ Using OpenAI API for all tasks (Whisper, GPT, TTS)")
 except Exception as e:
     print(f"❌ Failed to initialize LLM client: {e}")
     client = None
     except Exception:
         return "unknown"
 def normalize_language(lang: str, text: str) -> str:
     if lang == "nl" and any(
         word in text.lower() for word in ["wer", "was", "wie", "javid", "coaching"]
         return "de"
     return lang
 def system_prompt_book_only() -> str:
     return (
         "You are CoachingBot, a professional mentor trained on the book 'Coaching Millionär' by Javid Niazi-Hoffmann. "
         "Always respond in the same language as the user's question."
     )
 def system_prompt_fallback() -> str:
     return (
         "You are CoachingBot, a helpful business and life mentor. "
         "Do not invent book citations."
     )
 def format_answers(question: str, answer: str, results):
     pages = [f"Seite {r.get('page', '')}" for r in results if r.get("page")]
     source = ", ".join(pages) if pages else "No source"
     if client is None:
         return jsonify(format_answers(question, "⚠️ No language model initialized.", results)), 200
+    # ---------- LLM Query (OpenAI) ----------
     try:
         response = client.chat.completions.create(
+            model="gpt-4o-mini",  # switched to OpenAI model
             messages=[
                 {"role": "system", "content": sys_prompt},
                 {"role": "user", "content": user_content},
         return jsonify(format_answers(question, f"⚠️ LLM call failed: {e}", results)), 200
     return jsonify(format_answers(question, answer, results))
+# ---------- Voice Chat ----------
 @app.route("/voice", methods=["POST"])
 def voice_chat():
     try:
         if not audio:
             return jsonify({"error": "No audio file uploaded"}), 400
+        # Save temporary audio
         with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
             audio.save(tmp.name)
             audio_path = tmp.name
+        # Step 1️⃣: Transcribe via OpenAI Whisper
         transcription = client.audio.transcriptions.create(
             model="whisper-1",
+            file=open(audio_path, "rb"),
         )
         text = transcription.text.strip()
         print(f"🎤 Transcribed: {text}")
+        # Step 2️⃣: Generate answer via GPT
+        response = client.chat.completions.create(
+            model="gpt-4o-mini",
+            messages=[
+                {
+                    "role": "system",
+                    "content": (
+                        "You are CoachingBot, a professional mentor helping users improve their mindset, "
+                        "motivation, and business success. Be clear, empathetic, and practical."
+                    ),
+                },
+                {"role": "user", "content": text},
+            ],
+            max_tokens=700,
+        )
+        answer_text = response.choices[0].message.content.strip()
         # Step 3️⃣: Optional TTS response
         speech_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
         with client.audio.speech.with_streaming_response.create(
             model="gpt-4o-mini-tts",
             voice="alloy",
+            input=answer_text,
         ) as speech:
             speech.stream_to_file(speech_file.name)
+        return jsonify(
+            {
+                "transcript": text,
+                "answer": answer_text,
+                "audio_url": f"/audio/{os.path.basename(speech_file.name)}",
+            }
+        )
     except Exception as e:
         traceback.print_exc()
         return jsonify({"error": str(e)}), 500
 @app.route("/audio/<filename>")
 def serve_audio(filename):
+    return send_file(
+        os.path.join(tempfile.gettempdir(), filename), mimetype="audio/mpeg"
+    )
 # ---------- Run ----------
 if __name__ == "__main__":