Spaces:
Runtime error
Runtime error
Riddhi Bhagwat commited on
Commit ·
e5ddfb2
1
Parent(s): e72de4a
Revert "auto detection of language input"
Browse filesThis reverts commit 4b9fc1415d3348bfb68381cc37ebc89a7ecfb9ad.
- app/.DS_Store +0 -0
- app/app.py +5 -24
- app/lang_model_router.py +0 -35
app/.DS_Store
DELETED
|
Binary file (6.15 kB)
|
|
|
app/app.py
CHANGED
|
@@ -25,7 +25,6 @@ from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
|
|
| 25 |
import threading
|
| 26 |
from collections import defaultdict
|
| 27 |
from datasets import load_dataset
|
| 28 |
-
from lang_model_router import detect_language_code, get_language_name_and_model
|
| 29 |
|
| 30 |
|
| 31 |
BASE_MODEL = os.getenv("MODEL", "google/gemma-3-12b-pt")
|
|
@@ -397,29 +396,12 @@ def respond(
|
|
| 397 |
language: str,
|
| 398 |
temperature: Optional[float] = None,
|
| 399 |
seed: Optional[int] = None,
|
| 400 |
-
auto_detect: bool = True,
|
| 401 |
) -> list:
|
| 402 |
-
"""Respond to the user message with system
|
| 403 |
-
|
| 404 |
-
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
user_input = msg["content"]
|
| 408 |
-
break
|
| 409 |
-
|
| 410 |
-
# Determine language
|
| 411 |
-
if auto_detect:
|
| 412 |
-
lang_code = detect_language_code(user_input)
|
| 413 |
-
language, _ = get_language_name_and_model(lang_code)
|
| 414 |
-
|
| 415 |
-
# Load system prompt
|
| 416 |
-
system_prompt = LANGUAGES.get(language, LANGUAGES["English"])
|
| 417 |
-
|
| 418 |
-
# Format message list with system prompt prepended
|
| 419 |
-
messages = [{"role": "system", "content": system_prompt}]
|
| 420 |
-
messages.extend(format_history_as_messages(history))
|
| 421 |
-
|
| 422 |
-
# Generate response
|
| 423 |
if ZERO_GPU:
|
| 424 |
content = call_pipeline(messages)
|
| 425 |
else:
|
|
@@ -434,7 +416,6 @@ def respond(
|
|
| 434 |
)
|
| 435 |
content = response.choices[0].message.content
|
| 436 |
|
| 437 |
-
# Add response to history
|
| 438 |
message = gr.ChatMessage(role="assistant", content=content)
|
| 439 |
history.append(message)
|
| 440 |
return history
|
|
|
|
| 25 |
import threading
|
| 26 |
from collections import defaultdict
|
| 27 |
from datasets import load_dataset
|
|
|
|
| 28 |
|
| 29 |
|
| 30 |
BASE_MODEL = os.getenv("MODEL", "google/gemma-3-12b-pt")
|
|
|
|
| 396 |
language: str,
|
| 397 |
temperature: Optional[float] = None,
|
| 398 |
seed: Optional[int] = None,
|
|
|
|
| 399 |
) -> list:
|
| 400 |
+
"""Respond to the user message with a system message
|
| 401 |
+
|
| 402 |
+
Return the history with the new message"""
|
| 403 |
+
messages = format_history_as_messages(history)
|
| 404 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 405 |
if ZERO_GPU:
|
| 406 |
content = call_pipeline(messages)
|
| 407 |
else:
|
|
|
|
| 416 |
)
|
| 417 |
content = response.choices[0].message.content
|
| 418 |
|
|
|
|
| 419 |
message = gr.ChatMessage(role="assistant", content=content)
|
| 420 |
history.append(message)
|
| 421 |
return history
|
app/lang_model_router.py
DELETED
|
@@ -1,35 +0,0 @@
|
|
| 1 |
-
|
| 2 |
-
from langdetect import detect, DetectorFactory
|
| 3 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 4 |
-
import os
|
| 5 |
-
DetectorFactory.seed = 0
|
| 6 |
-
LANGUAGE_MAP = {
|
| 7 |
-
"en": {"name": "English", "model": "openai-community/gpt2"},
|
| 8 |
-
"fr": {"name": "French", "model": "dbddv01/gpt2-french-small"},
|
| 9 |
-
"es": {"name": "Spanish", "model": "datificate/gpt2-small-spanish"},
|
| 10 |
-
"de": {"name": "German", "model": "deepset/gbert-base"},
|
| 11 |
-
"hi": {"name": "Hindi", "model": "ai4bharat/indic-bert"},
|
| 12 |
-
"mr": {"name": "Marathi", "model": "ai4bharat/indic-bert"},
|
| 13 |
-
"ja": {"name": "Japanese", "model": "rinna/japanese-gpt2-medium"},
|
| 14 |
-
"zh-cn": {"name": "Chinese", "model": "uer/gpt2-chinese-cluecorpusswwm"},
|
| 15 |
-
"ru": {"name": "Russian", "model": "sberbank-ai/rugpt3small_based_on_gpt2"},
|
| 16 |
-
"pt": {"name": "Portuguese", "model": "pierreguillou/gpt2-small-portuguese"},
|
| 17 |
-
"it": {"name": "Italian", "model": "dbddv01/gpt2-italian"},
|
| 18 |
-
"nl": {"name": "Dutch", "model": "GroNLP/gpt2-small-dutch"}
|
| 19 |
-
}
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
def detect_language_code(text: str) -> str:
|
| 23 |
-
try:
|
| 24 |
-
return detect(text)
|
| 25 |
-
except Exception:
|
| 26 |
-
return "en" # fallback
|
| 27 |
-
|
| 28 |
-
def get_language_name_and_model(lang_code: str) -> tuple[str, str]:
|
| 29 |
-
return LANGUAGE_MAP.get(lang_code, LANGUAGE_MAP["en"])
|
| 30 |
-
|
| 31 |
-
def get_model_by_name(language_name: str) -> str:
|
| 32 |
-
for code, (name, model) in LANGUAGE_MAP.items():
|
| 33 |
-
if name.lower() == language_name.lower():
|
| 34 |
-
return model
|
| 35 |
-
return LANGUAGE_MODEL_MAP["en"][1]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|