chikamov1 commited on
Commit
b8b0952
·
1 Parent(s): 20e415f

Integrate all chatbot logic into app.py and update Dockerfile

Browse files
Files changed (3) hide show
  1. Dockerfile +3 -3
  2. app.py +195 -11
  3. cgpcorp_api_chat.py +0 -45
Dockerfile CHANGED
@@ -22,10 +22,10 @@ COPY --chown=user requirements.txt .
22
  RUN pip install --no-cache-dir --upgrade pip && \
23
  pip install --no-cache-dir -r requirements.txt
24
 
25
- # Copy the application file
26
- # The app.py will download models from Hugging Face Hub, so no need to copy model dirs here.
27
  COPY --chown=user ./app.py /app/app.py
28
- COPY --chown=user ./cgpcorp_api_chat.py /app/cgpcorp_api_chat.py
 
29
  COPY --chown=user ./README.md /app/README.md
30
 
31
  # Command to run the application using uvicorn
 
22
  RUN pip install --no-cache-dir --upgrade pip && \
23
  pip install --no-cache-dir -r requirements.txt
24
 
25
+ # Copy the application file and other necessary files
 
26
  COPY --chown=user ./app.py /app/app.py
27
+ # The cgpcorp_api_chat.py file is no longer needed as its logic is integrated into app.py
28
+ # Removed: COPY --chown=user ./cgpcorp_api_chat.py /app/cgpcorp_api_chat.py
29
  COPY --chown=user ./README.md /app/README.md
30
 
31
  # Command to run the application using uvicorn
app.py CHANGED
@@ -1,18 +1,202 @@
1
  from fastapi import FastAPI
2
  from pydantic import BaseModel
3
- from cgpcorp_api_chat import translate_text
 
 
 
 
 
4
 
5
- app = FastAPI(title="CGP Corp Chatbot API")
 
6
 
7
- class TranslationRequest(BaseModel):
8
- text: str
9
- direction: str # "en-fr" or "fr-en"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  @app.get("/")
12
- def home():
13
- return {"message": " CGP Corp Bot API running on Hugging Face Spaces"}
 
 
 
 
 
 
 
 
 
 
 
14
 
15
- @app.post("/translate")
16
- def translate(req: TranslationRequest):
17
- result = translate_text(req.text, req.direction)
18
- return {"translated_text": result}
 
1
  from fastapi import FastAPI
2
  from pydantic import BaseModel
3
+ import json
4
+ import os
5
+ import requests
6
+ import torch
7
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
8
+ from dotenv import load_dotenv
9
 
10
+ # --- Load environment variables ---
11
+ load_dotenv()
12
 
13
+ # --- Configuration ---
14
+ # Hugging Face Hub IDs for your trained MarianMT models
15
+ HF_EN_FR_REPO_ID = "cgpcorpbot/cgp_model_en-fr"
16
+ HF_FR_EN_REPO_ID = "cgpcorpbot/cgp_model_fr-en"
17
+
18
+ # Gemini API configuration
19
+ GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "")
20
+ # Correct Gemini API URL for generateContent
21
+ GEMINI_API_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent"
22
+
23
+ # --- Load MarianMT models and tokenizers using pipeline ---
24
+ # These will be loaded once when the app starts
25
+ try:
26
+ # Set device to CPU
27
+ device = torch.device("cpu")
28
+
29
+ print("Loading EN->FR model...")
30
+ tokenizer_en_fr = AutoTokenizer.from_pretrained(HF_EN_FR_REPO_ID)
31
+ model_en_fr = AutoModelForSeq2SeqLM.from_pretrained(HF_EN_FR_REPO_ID).to(device)
32
+ # Using pipeline for easier translation
33
+ translator_en_fr = pipeline("translation", model=model_en_fr, tokenizer=tokenizer_en_fr, device=device)
34
+ print(f"✅ MarianMT EN-FR Model loaded from Hugging Face Hub: {HF_EN_FR_REPO_ID} and moved to {device}")
35
+
36
+ print("Loading FR->EN model...")
37
+ tokenizer_fr_en = AutoTokenizer.from_pretrained(HF_FR_EN_REPO_ID)
38
+ model_fr_en = AutoModelForSeq2SeqLM.from_pretrained(HF_FR_EN_REPO_ID).to(device)
39
+ # Using pipeline for easier translation
40
+ translator_fr_en = pipeline("translation", model=model_fr_en, tokenizer=tokenizer_fr_en, device=device)
41
+ print(f"✅ MarianMT FR-EN Model loaded from Hugging Face Hub: {HF_FR_EN_REPO_ID} and moved to {device}")
42
+
43
+ except Exception as e:
44
+ print(f"❌ Failed to load MarianMT models from Hugging Face Hub: {e}")
45
+ raise RuntimeError(f"Failed to load translation models: {e}")
46
+
47
+ # --- Language Detection (Simplified) ---
48
+ def detect_language(text: str) -> str:
49
+ text_lower = text.lower()
50
+ french_keywords = ["le", "la", "les", "un", "une", "des", "est", "sont", "je", "tu", "il", "elle", "nous", "vous", "ils", "elles", "pas", "de", "du", "et", "à", "en", "que", "qui", "quoi", "comment", "où", "quand"]
51
+
52
+ french_word_count = sum(1 for word in french_keywords if word in text_lower.split())
53
+
54
+ if french_word_count > 2:
55
+ return "french"
56
+ return "english"
57
+
58
+ # --- MarianMT Translation with Gemini Fallback ---
59
+ def translate_text_with_fallback(text: str, direction: str) -> str:
60
+ """
61
+ Translates text using custom MarianMT models, falling back to Gemini if local model fails.
62
+ `direction` can be "en-fr" or "fr-en".
63
+ """
64
+ if not text.strip():
65
+ return ""
66
+
67
+ try:
68
+ if direction == "en-fr":
69
+ # Use the pipeline for translation
70
+ translated_result = translator_en_fr(text, max_length=128)
71
+ return translated_result[0]['translation_text'].strip()
72
+ elif direction == "fr-en":
73
+ # Use the pipeline for translation
74
+ translated_result = translator_fr_en(text, max_length=128)
75
+ return translated_result[0]['translation_text'].strip()
76
+ else:
77
+ return "Invalid translation direction."
78
+ except Exception as e:
79
+ print(f"⚠️ Local MarianMT model failed for {direction}, falling back to Gemini for translation: {e}")
80
+ # Fallback to Gemini for translation if MarianMT fails
81
+ return gemini_translate_for_translation(text, direction)
82
+
83
+ async def gemini_translate_for_translation(text: str, direction: str) -> str:
84
+ """
85
+ Uses Gemini API for translation if MarianMT fails.
86
+ This is a separate function specifically for translation fallback,
87
+ not for general chatbot responses.
88
+ """
89
+ if not GEMINI_API_KEY:
90
+ print("❌ Gemini API Key is missing for translation fallback.")
91
+ return "API key missing for translation."
92
+
93
+ target_lang = "French" if direction == "en-fr" else "English"
94
+ # Prompt Gemini to perform translation
95
+ prompt = f"Translate the following text to {target_lang}: \"{text}\""
96
+
97
+ chat_history = [{"role": "user", "parts": [{"text": prompt}]}]
98
+ payload = {"contents": chat_history}
99
+ headers = {'Content-Type': 'application/json'}
100
+ api_url_with_key = f"{GEMINI_API_URL}?key={GEMINI_API_KEY}"
101
+
102
+ try:
103
+ response = requests.post(api_url_with_key, headers=headers, data=json.dumps(payload))
104
+ response.raise_for_status()
105
+ result = response.json()
106
+ return result.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "Translation failed via Gemini.")
107
+ except Exception as e:
108
+ print(f"Gemini API translation fallback error: {e}")
109
+ return "Gemini API translation error."
110
+
111
+ # --- Main Gemini API Call for Conversational Response ---
112
+ async def call_gemini_api_for_response(prompt: str) -> str:
113
+ """
114
+ Calls the Gemini API to get a conversational response in English.
115
+ This is the primary function for generating chatbot responses.
116
+ """
117
+ if not GEMINI_API_KEY:
118
+ print("❌ Gemini API Key is missing for main response.")
119
+ return "API key missing."
120
+
121
+ chat_history = []
122
+ # Gemini will always be prompted in English for consistent behavior
123
+ gemini_prompt = f"Answer the following question in English: {prompt}"
124
+
125
+ chat_history.append({"role": "user", "parts": [{"text": gemini_prompt}]})
126
+ payload = {"contents": chat_history}
127
+
128
+ headers = {'Content-Type': 'application/json'}
129
+ api_url_with_key = f"{GEMINI_API_URL}?key={GEMINI_API_KEY}"
130
+
131
+ try:
132
+ response = requests.post(api_url_with_key, headers=headers, data=json.dumps(payload))
133
+ response.raise_for_status()
134
+ result = response.json()
135
+
136
+ return result.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "No response from Gemini.")
137
+ except Exception as e:
138
+ print(f"Gemini API error for main response: {e}")
139
+ return "Gemini API error for main response."
140
+
141
+ # --- Main Chatbot Response Logic ---
142
+ async def get_multilingual_chatbot_response(user_input: str) -> str:
143
+ """
144
+ Generates a chatbot response using MarianMT for translation and Gemini for core logic.
145
+ Handles language detection, translation, Gemini interaction, and translation back.
146
+ """
147
+ detected_lang = detect_language(user_input)
148
+ print(f"Detected language: {detected_lang.upper()}")
149
+
150
+ english_query = user_input
151
+ if detected_lang == "french":
152
+ print("Translating French query to English...")
153
+ # Use the translation function with fallback
154
+ english_query = await translate_text_with_fallback(user_input, "fr-en")
155
+ print(f"Translated query (EN): {english_query}")
156
+ if not english_query.strip() or english_query == "API key missing for translation." or english_query == "Gemini API translation error.":
157
+ # If translation fails or is empty, use original input for Gemini
158
+ english_query = user_input
159
+ print("French to English translation resulted in empty string or error, using original input for Gemini.")
160
+
161
+ # Get conversational response from Gemini (always in English)
162
+ gemini_response_en = await call_gemini_api_for_response(english_query)
163
+ print(f"Gemini response (EN): {gemini_response_en}")
164
+
165
+ final_response = gemini_response_en
166
+ # Only translate back if original input was French AND Gemini provided a valid response
167
+ if detected_lang == "french" and gemini_response_en not in ["API key missing for main response.", "Gemini API error for main response.", "No response from Gemini."]:
168
+ print("Translating English response back to French...")
169
+ # Use the translation function with fallback
170
+ translated_back_fr = await translate_text_with_fallback(gemini_response_en, "en-fr")
171
+ if translated_back_fr.strip() and translated_back_fr not in ["API key missing for translation.", "Gemini API translation error."]:
172
+ final_response = translated_back_fr
173
+ else:
174
+ print("English to French translation resulted in empty string or error, using English Gemini response.")
175
+ final_response = gemini_response_en # Fallback to English if translation back fails
176
+
177
+ return final_response
178
+
179
+ # --- FastAPI Application ---
180
+ app = FastAPI()
181
+
182
+ # Define the request body model
183
+ class ChatRequest(BaseModel):
184
+ user_input: str
185
 
186
  @app.get("/")
187
+ async def root():
188
+ return {"message": "Multilingual Chatbot API is running. Use /chat endpoint."}
189
+
190
+ @app.post("/chat")
191
+ async def chat_endpoint(request: ChatRequest):
192
+ """
193
+ Endpoint for the multilingual chatbot.
194
+ Receives user input, detects language, translates, gets Gemini response,
195
+ and translates back if necessary.
196
+ """
197
+ user_input = request.user_input
198
+ if not user_input:
199
+ return {"response": "Please provide some input."}
200
 
201
+ response = await get_multilingual_chatbot_response(user_input)
202
+ return {"response": response}
 
 
cgpcorp_api_chat.py DELETED
@@ -1,45 +0,0 @@
1
- import os
2
- import requests
3
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
4
-
5
- # Load Hugging Face models
6
- MODEL_EN_FR = "cgpcorpbot/cgp_model_en-fr"
7
- MODEL_FR_EN = "cgpcorpbot/cgp_model_fr-en"
8
-
9
- print("Loading EN->FR model...")
10
- tokenizer_en_fr = AutoTokenizer.from_pretrained(MODEL_EN_FR)
11
- model_en_fr = AutoModelForSeq2SeqLM.from_pretrained(MODEL_EN_FR)
12
- translator_en_fr = pipeline("translation", model=model_en_fr, tokenizer=tokenizer_en_fr)
13
-
14
- print("Loading FR->EN model...")
15
- tokenizer_fr_en = AutoTokenizer.from_pretrained(MODEL_FR_EN)
16
- model_fr_en = AutoModelForSeq2SeqLM.from_pretrained(MODEL_FR_EN)
17
- translator_fr_en = pipeline("translation", model=model_fr_en, tokenizer=tokenizer_fr_en)
18
-
19
- # Gemini API key (set as HF secret in the Space)
20
- GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
21
-
22
- def translate_text(text, direction="en-fr"):
23
- """Translate using custom models, fallback to Gemini."""
24
- try:
25
- if direction == "en-fr":
26
- return translator_en_fr(text, max_length=512)[0]['translation_text']
27
- else:
28
- return translator_fr_en(text, max_length=512)[0]['translation_text']
29
- except Exception as e:
30
- print("⚠️ Local model failed, falling back to Gemini:", e)
31
- return gemini_translate(text, direction)
32
-
33
- def gemini_translate(text, direction="en-fr"):
34
- """Fallback using Gemini API."""
35
- if not GEMINI_API_KEY:
36
- return "Gemini API key not configured."
37
- target = "fr" if direction == "en-fr" else "en"
38
- response = requests.post(
39
- "https://api.gemini.com/translate",
40
- headers={"Authorization": f"Bearer {GEMINI_API_KEY}"},
41
- json={"q": text, "target": target}
42
- )
43
- if response.status_code == 200:
44
- return response.json().get("translatedText", "Translation failed.")
45
- return f"Gemini API Error: {response.text}"