Mahmous commited on
Commit
7eb2ed6
·
verified ·
1 Parent(s): da73dc2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +178 -5
app.py CHANGED
@@ -1,7 +1,180 @@
1
- import gradio as gr
 
 
 
 
 
 
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
 
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import traceback
3
+ from flask import Flask, request, jsonify
4
+ from flask_cors import CORS
5
+ from dotenv import load_dotenv
6
+ from openai import OpenAI
7
+ from langdetect import detect
8
+ from googletrans import Translator
9
+ from sentence_transformers import SentenceTransformer
10
+ from pinecone import Pinecone
11
 
12
+ # ---------- Config ----------
13
+ DATASET_PATH = "data/coaching_millionaer_dataset.json"
14
+ load_dotenv()
15
 
16
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
17
+ PINECONE_API_KEY = os.getenv("PINECONE_API_KEY") # add this to your .env
18
+ PINECONE_INDEX_NAME = "ebook"
19
+
20
+ # ---------- App ----------
21
+ app = Flask(__name__)
22
+ CORS(app, resources={r"/ask": {"origins": "*"}})
23
+
24
+ # ---------- OpenAI Client ----------
25
+ client = None
26
+ if OPENAI_API_KEY:
27
+ client = OpenAI(api_key=OPENAI_API_KEY)
28
+ else:
29
+ print("⚠️ OPENAI_API_KEY is missing in .env")
30
+
31
+ # ---------- Retriever ----------
32
+ retriever = None
33
+ try:
34
+ if not PINECONE_API_KEY:
35
+ raise ValueError("PINECONE_API_KEY missing in .env")
36
+
37
+ pc = Pinecone(api_key=PINECONE_API_KEY)
38
+ index = pc.Index(PINECONE_INDEX_NAME)
39
+ embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
40
+
41
+ class PineconeRetriever:
42
+ def __init__(self, index, embedder):
43
+ self.index = index
44
+ self.embedder = embedder
45
+
46
+ def retrieve(self, query, top_k=10):
47
+ emb = self.embedder.encode(query).tolist()
48
+ res = self.index.query(vector=emb, top_k=top_k, include_metadata=True)
49
+ matches = res.get("matches", [])
50
+ results = []
51
+ for match in matches:
52
+ meta = match.get("metadata", {})
53
+ results.append({
54
+ "context": meta.get("context", ""),
55
+ "page": meta.get("page"),
56
+ "score": match.get("score", 0)
57
+ })
58
+ return results
59
+
60
+ retriever = PineconeRetriever(index, embedder)
61
+ print("✅ Pinecone retriever initialized successfully.")
62
+ except Exception as e:
63
+ print("❌ Retriever initialization failed:", e)
64
+ traceback.print_exc()
65
+
66
+ translator = Translator()
67
+
68
+ # ---------- Helpers ----------
69
+ def detect_language(question: str) -> str:
70
+ """Detect the user's language without translation."""
71
+ try:
72
+ return detect(question)
73
+ except Exception:
74
+ return "unknown"
75
+
76
+ def normalize_language(lang: str, text: str) -> str:
77
+ """Fix incorrect language detection like 'wer is' → German."""
78
+ if lang == "nl" and any(word in text.lower() for word in ["wer", "was", "wie", "javid", "coaching"]):
79
+ return "de"
80
+ return lang
81
+
82
+ def system_prompt_book_only() -> str:
83
+ return (
84
+ "You are CoachingBot, a professional mentor trained on the book 'Coaching Millionär' by Javid Niazi-Hoffmann. "
85
+ "Use only the provided book context to answer the question. "
86
+ "If the user asks about people like Javid Niazi-Hoffmann, describe them factually using the book content. "
87
+ "Mention page numbers where possible. "
88
+ "If the context is not relevant, say you don’t have that information in the book and provide a general, helpful answer. "
89
+ "Always respond in the same language as the user's question, even if the book content is in another language."
90
+ )
91
+
92
+ def system_prompt_fallback() -> str:
93
+ return (
94
+ "You are CoachingBot, a helpful business and life mentor. "
95
+ "The question cannot be answered from the book, so answer using your general coaching knowledge. "
96
+ "Always respond in the same language as the user's question, even if the book content is in another language. "
97
+ "Do not invent book citations."
98
+ )
99
+
100
+ def format_answers(question: str, answer: str, results):
101
+ pages = [f"Seite {r.get('page', '')}" for r in results if r.get("page")]
102
+ source = ", ".join(pages) if pages else "No source"
103
+ top_score = max([r.get("score", 0.0) for r in results], default=0.0)
104
+ return {"answers": [{"question": question, "answer": answer, "source": source, "bm25_score": top_score}]}
105
+
106
+ # ---------- Routes ----------
107
+ @app.route("/", methods=["GET"])
108
+ def health():
109
+ return jsonify({
110
+ "status": "running",
111
+ "retriever_ready": bool(retriever),
112
+ "openai_key_loaded": bool(OPENAI_API_KEY),
113
+ "pinecone_key_loaded": bool(PINECONE_API_KEY),
114
+ "index_name": PINECONE_INDEX_NAME
115
+ })
116
+
117
+ @app.route("/ask", methods=["POST", "OPTIONS"])
118
+ def ask():
119
+ if request.method == "OPTIONS":
120
+ return ("", 204)
121
+
122
+ try:
123
+ data = request.get_json(force=True) or {}
124
+ question = (data.get("question") or "").strip()
125
+ except Exception:
126
+ return jsonify(format_answers("", "Invalid JSON request", [])), 200
127
+
128
+ if not question:
129
+ return jsonify(format_answers("", "Please enter a question.", [])), 200
130
+
131
+ print(f"\n--- User Question ---\n{question}")
132
+
133
+ # Detect and normalize language
134
+ user_lang = normalize_language(detect_language(question), question)
135
+ print(f"Detected language: {user_lang}")
136
+
137
+ # Retrieve context
138
+ context, results = "", []
139
+ try:
140
+ raw_results = retriever.retrieve(question)
141
+ MIN_SCORE = 0.10 # Pinecone similarity scores are normalized (0–1)
142
+ results = [r for r in raw_results if r.get("score", 0) >= MIN_SCORE]
143
+ if results:
144
+ context = "\n\n---\n\n".join(
145
+ [f"(Seite {r['page']}) {r['context']}" for r in results]
146
+ )
147
+ except Exception as e:
148
+ traceback.print_exc()
149
+ return jsonify(format_answers(question, f"Retriever error: {e}", [])), 200
150
+
151
+ # Build prompts
152
+ if context:
153
+ sys_prompt = system_prompt_book_only()
154
+ user_content = f"Question: {question}\n\nBook context:\n{context}"
155
+ else:
156
+ sys_prompt = system_prompt_fallback()
157
+ user_content = question
158
+
159
+ # Query GPT
160
+ try:
161
+ response = client.chat.completions.create(
162
+ model="gpt-4o-mini",
163
+ messages=[
164
+ {"role": "system", "content": sys_prompt},
165
+ {"role": "user", "content": user_content}
166
+ ],
167
+ max_tokens=700,
168
+ )
169
+ answer = response.choices[0].message.content.strip()
170
+ except Exception as e:
171
+ traceback.print_exc()
172
+ return jsonify(format_answers(question, f"⚠️ OpenAI call failed: {e}", [])), 200
173
+
174
+ return jsonify(format_answers(question, answer, results))
175
+
176
+ # ---------- Run ----------
177
+ if __name__ == "__main__":
178
+ port = int(os.environ.get("PORT", 7860))
179
+ print(f"🚀 Server started on port {port}")
180
+ app.run(host="0.0.0.0", port=port)