Spaces:

wt1711
/

lovebird25

Paused

App Files Files Community

Paul commited on Dec 2, 2025

Commit

0748ff8

1 Parent(s): 136f619

update

Browse files

Files changed (12) hide show

__pycache__/app.cpython-313.pyc +0 -0
app.py +57 -23
config/ai_models.json +512 -0
finetune_model.py +40 -40
finetuned_reply_service.py +11 -11
openai_models_cache.json +512 -0
openai_service.py +250 -0
setup_and_finetune.py +8 -8
test_all_openai_models.py +139 -0
test_openai_list_models.py +103 -0
test_openai_response.py +94 -0
trigger_move_identifier.py +7 -7

__pycache__/app.cpython-313.pyc CHANGED Viewed

Binary files a/__pycache__/app.cpython-313.pyc and b/__pycache__/app.cpython-313.pyc differ

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ from typing import Dict, Any, Tuple
 from reply_service import get_reply_service
 from trigger_move_identifier import get_trigger_move_identifier
-from perplexity_service import get_perplexity_service
 from gemini_service import get_gemini_service, get_available_gemini_models
@@ -77,8 +77,8 @@ def parse_conversation(text: str) -> Tuple[str, str]:
     return male, female
-def run_full_pipeline(conversation: str, wingman_prompt: str = "", gemini_model_name: str = "gemini-2.5-flash") -> Dict[str, Any]:
-    """Run trigger detector and generate replies from 5 models (3 prompt styles + Perplexity + Gemini)."""
     try:
         male, female = parse_conversation(conversation)
         identifier = get_trigger_move_identifier(
@@ -129,20 +129,20 @@ def run_full_pipeline(conversation: str, wingman_prompt: str = "", gemini_model_
             wingman_reply = ""
             wingman_error = str(exc)
-        # Model 4 – Perplexity API
         try:
-            perplexity_service = get_perplexity_service()
-            # Format conversation for Perplexity: "Male: ... ||| Female: ..."
             formatted_conversation = f"Male: {male} ||| Female: {female}"
-            perplexity_reply = perplexity_service.generate_reply(
                 conversation=formatted_conversation,
                 trigger=trigger,
                 move=move,
             )
-            perplexity_error = ""
         except Exception as exc:
-            perplexity_reply = ""
-            perplexity_error = str(exc)
         models_output["llama"] = {
             "label": "Model 1 – Prompt style: an toàn / nhẹ nhàng",
@@ -162,10 +162,10 @@ def run_full_pipeline(conversation: str, wingman_prompt: str = "", gemini_model_
             "error": wingman_error,
         }
-        models_output["perplexity"] = {
-            "label": "Model 4 – Perplexity API",
-            "reply": perplexity_reply,
-            "error": perplexity_error,
         }
         # Model 5 – Google Gemini API
@@ -265,7 +265,7 @@ with gr.Blocks(title=title) as demo:
     # Main Reply Suggestion Tab
     gr.Markdown("### 🎯 Generate AI Reply Suggestions (5 Models)")
-    gr.Markdown("Nhập hội thoại và hệ thống sẽ chạy pipeline Trigger → Move → 5 models (3 prompt styles + Perplexity API + Gemini API).")
     with gr.Row():
         with gr.Column(scale=2):
@@ -298,6 +298,40 @@ with gr.Blocks(title=title) as demo:
                 )
                 gr.Markdown("Leave as-is for default behavior. Edits apply to Model 3 when its LoRA is used.")
             # Model 5 – Gemini Model Selection (using whitelist - 15 tested models)
             try:
                 gemini_models = get_available_gemini_models(use_whitelist=True)
@@ -361,11 +395,11 @@ with gr.Blocks(title=title) as demo:
         placeholder="Reply từ mô hình Wingman LoRA (hoặc fallback prompt) sẽ xuất hiện tại đây.",
     )
-    perplexity_box = gr.Textbox(
         lines=3,
-        label="Model 4 – Perplexity API",
         interactive=False,
-        placeholder="Reply từ Perplexity API sẽ xuất hiện tại đây.",
     )
     gemini_box = gr.Textbox(
@@ -375,9 +409,9 @@ with gr.Blocks(title=title) as demo:
         placeholder="Reply từ Gemini API sẽ xuất hiện tại đây.",
     )
-    def generate_reply_with_extraction(conversation: str, wingman_prompt: str, gemini_model_name: str) -> Tuple[Dict[str, Any], str, str, str, str, str]:
         """Generate replies from five models."""
-        result = run_full_pipeline(conversation, wingman_prompt, gemini_model_name)
         if "error" in result:
             error_msg = f"❌ {result['error']}"
             return result, error_msg, error_msg, error_msg, error_msg, error_msg
@@ -397,14 +431,14 @@ with gr.Blocks(title=title) as demo:
             extract_text("llama"),
             extract_text("pho"),
             extract_text("wingman"),
-            extract_text("perplexity"),
             extract_text("gemini"),
         )
     reply_btn.click(
         generate_reply_with_extraction,
-        inputs=[reply_in, wingman_prompt_in, gemini_model_dropdown],
-        outputs=[reply_out, llama_box, pho_box, wingman_box, perplexity_box, gemini_box],
         api_name="reply"
     )

 from reply_service import get_reply_service
 from trigger_move_identifier import get_trigger_move_identifier
+from openai_service import get_openai_service, get_available_models
 from gemini_service import get_gemini_service, get_available_gemini_models
     return male, female
+def run_full_pipeline(conversation: str, wingman_prompt: str = "", gemini_model_name: str = "models/gemini-2.0-flash", openai_model_name: str = "gpt-4o-mini") -> Dict[str, Any]:
+    """Run trigger detector and generate replies from 5 models (3 prompt styles + OpenAI + Gemini)."""
     try:
         male, female = parse_conversation(conversation)
         identifier = get_trigger_move_identifier(
             wingman_reply = ""
             wingman_error = str(exc)
+        # Model 4 – OpenAI API
         try:
+            openai_service = get_openai_service(model_name=openai_model_name)
+            # Format conversation for OpenAI: "Male: ... ||| Female: ..."
             formatted_conversation = f"Male: {male} ||| Female: {female}"
+            openai_reply = openai_service.generate_reply(
                 conversation=formatted_conversation,
                 trigger=trigger,
                 move=move,
             )
+            openai_error = ""
         except Exception as exc:
+            openai_reply = ""
+            openai_error = str(exc)
         models_output["llama"] = {
             "label": "Model 1 – Prompt style: an toàn / nhẹ nhàng",
             "error": wingman_error,
         }
+        models_output["openai"] = {
+            "label": f"Model 4 – OpenAI API ({openai_model_name})",
+            "reply": openai_reply,
+            "error": openai_error,
         }
         # Model 5 – Google Gemini API
     # Main Reply Suggestion Tab
     gr.Markdown("### 🎯 Generate AI Reply Suggestions (5 Models)")
+    gr.Markdown("Nhập hội thoại và hệ thống sẽ chạy pipeline Trigger → Move → 5 models (3 prompt styles + OpenAI API + Gemini API).")
     with gr.Row():
         with gr.Column(scale=2):
                 )
                 gr.Markdown("Leave as-is for default behavior. Edits apply to Model 3 when its LoRA is used.")
+            # Model 4 – OpenAI Model Selection (with search)
+            try:
+                # Load all available models from cache
+                all_model_ids = get_available_models(prefix_filters=["gpt-", "o1-", "o3-"])
+                # Create choices with (label, value) format for better display
+                openai_dropdown_choices = []
+                openai_model_choices = []
+                for model_id in all_model_ids:
+                    # Use model_id as both label and value, but format nicely
+                    label = model_id.replace("gpt-", "GPT-").replace("o1-", "O1-").replace("o3-", "O3-")
+                    openai_dropdown_choices.append((label, model_id))
+                    openai_model_choices.append(model_id)
+                default_openai_model = "gpt-4o-mini" if "gpt-4o-mini" in openai_model_choices else (openai_model_choices[0] if openai_model_choices else "gpt-4o-mini")
+                print(f"✓ Loaded {len(openai_model_choices)} OpenAI models for dropdown")
+            except Exception as e:
+                print(f"⚠ Error loading OpenAI models: {e}")
+                openai_dropdown_choices = [("gpt-4o-mini (default - API key may be missing)", "gpt-4o-mini")]
+                openai_model_choices = ["gpt-4o-mini"]
+                default_openai_model = "gpt-4o-mini"
+            openai_model_dropdown = gr.Dropdown(
+                choices=openai_dropdown_choices,
+                value=default_openai_model,
+                label="Model 4 – Select OpenAI Model",
+                info=f"Search and choose from {len(openai_model_choices)} available OpenAI models",
+                interactive=True,
+                filterable=True,  # Enable search/filter functionality
+                scale=1,
+            )
             # Model 5 – Gemini Model Selection (using whitelist - 15 tested models)
             try:
                 gemini_models = get_available_gemini_models(use_whitelist=True)
         placeholder="Reply từ mô hình Wingman LoRA (hoặc fallback prompt) sẽ xuất hiện tại đây.",
     )
+    openai_box = gr.Textbox(
         lines=3,
+        label="Model 4 – OpenAI API",
         interactive=False,
+        placeholder="Reply từ OpenAI API sẽ xuất hiện tại đây.",
     )
     gemini_box = gr.Textbox(
         placeholder="Reply từ Gemini API sẽ xuất hiện tại đây.",
     )
+    def generate_reply_with_extraction(conversation: str, wingman_prompt: str, openai_model_name: str, gemini_model_name: str) -> Tuple[Dict[str, Any], str, str, str, str, str]:
         """Generate replies from five models."""
+        result = run_full_pipeline(conversation, wingman_prompt, gemini_model_name, openai_model_name)
         if "error" in result:
             error_msg = f"❌ {result['error']}"
             return result, error_msg, error_msg, error_msg, error_msg, error_msg
             extract_text("llama"),
             extract_text("pho"),
             extract_text("wingman"),
+            extract_text("openai"),
             extract_text("gemini"),
         )
     reply_btn.click(
         generate_reply_with_extraction,
+        inputs=[reply_in, wingman_prompt_in, openai_model_dropdown, gemini_model_dropdown],
+        outputs=[reply_out, llama_box, pho_box, wingman_box, openai_box, gemini_box],
         api_name="reply"
     )

config/ai_models.json ADDED Viewed

	@@ -0,0 +1,512 @@

+[
+  {
+    "id": "gpt-4-0613",
+    "created": 1686588896,
+    "owned_by": "openai"
+  },
+  {
+    "id": "gpt-4",
+    "created": 1687882411,
+    "owned_by": "openai"
+  },
+  {
+    "id": "gpt-3.5-turbo",
+    "created": 1677610602,
+    "owned_by": "openai"
+  },
+  {
+    "id": "gpt-5.1-codex-mini",
+    "created": 1763007109,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-5.1-chat-latest",
+    "created": 1762547951,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-5.1-2025-11-13",
+    "created": 1762800353,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-5.1",
+    "created": 1762800673,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-5.1-codex",
+    "created": 1762988221,
+    "owned_by": "system"
+  },
+  {
+    "id": "davinci-002",
+    "created": 1692634301,
+    "owned_by": "system"
+  },
+  {
+    "id": "babbage-002",
+    "created": 1692634615,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-3.5-turbo-instruct",
+    "created": 1692901427,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-3.5-turbo-instruct-0914",
+    "created": 1694122472,
+    "owned_by": "system"
+  },
+  {
+    "id": "dall-e-3",
+    "created": 1698785189,
+    "owned_by": "system"
+  },
+  {
+    "id": "dall-e-2",
+    "created": 1698798177,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4-1106-preview",
+    "created": 1698957206,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-3.5-turbo-1106",
+    "created": 1698959748,
+    "owned_by": "system"
+  },
+  {
+    "id": "tts-1-hd",
+    "created": 1699046015,
+    "owned_by": "system"
+  },
+  {
+    "id": "tts-1-1106",
+    "created": 1699053241,
+    "owned_by": "system"
+  },
+  {
+    "id": "tts-1-hd-1106",
+    "created": 1699053533,
+    "owned_by": "system"
+  },
+  {
+    "id": "text-embedding-3-small",
+    "created": 1705948997,
+    "owned_by": "system"
+  },
+  {
+    "id": "text-embedding-3-large",
+    "created": 1705953180,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4-0125-preview",
+    "created": 1706037612,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4-turbo-preview",
+    "created": 1706037777,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-3.5-turbo-0125",
+    "created": 1706048358,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4-turbo",
+    "created": 1712361441,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4-turbo-2024-04-09",
+    "created": 1712601677,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4o",
+    "created": 1715367049,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4o-2024-05-13",
+    "created": 1715368132,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4o-mini-2024-07-18",
+    "created": 1721172717,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4o-mini",
+    "created": 1721172741,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4o-2024-08-06",
+    "created": 1722814719,
+    "owned_by": "system"
+  },
+  {
+    "id": "chatgpt-4o-latest",
+    "created": 1723515131,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4o-realtime-preview-2024-10-01",
+    "created": 1727131766,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4o-audio-preview-2024-10-01",
+    "created": 1727389042,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4o-audio-preview",
+    "created": 1727460443,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4o-realtime-preview",
+    "created": 1727659998,
+    "owned_by": "system"
+  },
+  {
+    "id": "omni-moderation-latest",
+    "created": 1731689265,
+    "owned_by": "system"
+  },
+  {
+    "id": "omni-moderation-2024-09-26",
+    "created": 1732734466,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4o-realtime-preview-2024-12-17",
+    "created": 1733945430,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4o-audio-preview-2024-12-17",
+    "created": 1734034239,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4o-mini-realtime-preview-2024-12-17",
+    "created": 1734112601,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4o-mini-audio-preview-2024-12-17",
+    "created": 1734115920,
+    "owned_by": "system"
+  },
+  {
+    "id": "o1-2024-12-17",
+    "created": 1734326976,
+    "owned_by": "system"
+  },
+  {
+    "id": "o1",
+    "created": 1734375816,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4o-mini-realtime-preview",
+    "created": 1734387380,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4o-mini-audio-preview",
+    "created": 1734387424,
+    "owned_by": "system"
+  },
+  {
+    "id": "o3-mini",
+    "created": 1737146383,
+    "owned_by": "system"
+  },
+  {
+    "id": "o3-mini-2025-01-31",
+    "created": 1738010200,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4o-2024-11-20",
+    "created": 1739331543,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4o-search-preview-2025-03-11",
+    "created": 1741388170,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4o-search-preview",
+    "created": 1741388720,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4o-mini-search-preview-2025-03-11",
+    "created": 1741390858,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4o-mini-search-preview",
+    "created": 1741391161,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4o-transcribe",
+    "created": 1742068463,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4o-mini-transcribe",
+    "created": 1742068596,
+    "owned_by": "system"
+  },
+  {
+    "id": "o1-pro-2025-03-19",
+    "created": 1742251504,
+    "owned_by": "system"
+  },
+  {
+    "id": "o1-pro",
+    "created": 1742251791,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4o-mini-tts",
+    "created": 1742403959,
+    "owned_by": "system"
+  },
+  {
+    "id": "o3-2025-04-16",
+    "created": 1744133301,
+    "owned_by": "system"
+  },
+  {
+    "id": "o4-mini-2025-04-16",
+    "created": 1744133506,
+    "owned_by": "system"
+  },
+  {
+    "id": "o3",
+    "created": 1744225308,
+    "owned_by": "system"
+  },
+  {
+    "id": "o4-mini",
+    "created": 1744225351,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4.1-2025-04-14",
+    "created": 1744315746,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4.1",
+    "created": 1744316542,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4.1-mini-2025-04-14",
+    "created": 1744317547,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4.1-mini",
+    "created": 1744318173,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4.1-nano-2025-04-14",
+    "created": 1744321025,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4.1-nano",
+    "created": 1744321707,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-image-1",
+    "created": 1745517030,
+    "owned_by": "system"
+  },
+  {
+    "id": "codex-mini-latest",
+    "created": 1746673257,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4o-realtime-preview-2025-06-03",
+    "created": 1748907838,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4o-audio-preview-2025-06-03",
+    "created": 1748908498,
+    "owned_by": "system"
+  },
+  {
+    "id": "o4-mini-deep-research",
+    "created": 1749685485,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4o-transcribe-diarize",
+    "created": 1750798887,
+    "owned_by": "system"
+  },
+  {
+    "id": "o4-mini-deep-research-2025-06-26",
+    "created": 1750866121,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-5-chat-latest",
+    "created": 1754073306,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-5-2025-08-07",
+    "created": 1754075360,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-5",
+    "created": 1754425777,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-5-mini-2025-08-07",
+    "created": 1754425867,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-5-mini",
+    "created": 1754425928,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-5-nano-2025-08-07",
+    "created": 1754426303,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-5-nano",
+    "created": 1754426384,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-audio-2025-08-28",
+    "created": 1756256146,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-realtime",
+    "created": 1756271701,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-realtime-2025-08-28",
+    "created": 1756271773,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-audio",
+    "created": 1756339249,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-5-codex",
+    "created": 1757527818,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-image-1-mini",
+    "created": 1758845821,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-5-pro-2025-10-06",
+    "created": 1759469707,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-5-pro",
+    "created": 1759469822,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-audio-mini",
+    "created": 1759512027,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-audio-mini-2025-10-06",
+    "created": 1759512137,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-5-search-api",
+    "created": 1759514629,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-realtime-mini",
+    "created": 1759517133,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-realtime-mini-2025-10-06",
+    "created": 1759517175,
+    "owned_by": "system"
+  },
+  {
+    "id": "sora-2",
+    "created": 1759708615,
+    "owned_by": "system"
+  },
+  {
+    "id": "sora-2-pro",
+    "created": 1759708663,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-5-search-api-2025-10-14",
+    "created": 1760043960,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-3.5-turbo-16k",
+    "created": 1683758102,
+    "owned_by": "openai-internal"
+  },
+  {
+    "id": "tts-1",
+    "created": 1681940951,
+    "owned_by": "openai-internal"
+  },
+  {
+    "id": "whisper-1",
+    "created": 1677532384,
+    "owned_by": "openai-internal"
+  },
+  {
+    "id": "text-embedding-ada-002",
+    "created": 1671217299,
+    "owned_by": "openai-internal"
+  }
+]

finetune_model.py CHANGED Viewed

@@ -76,7 +76,7 @@ def prepare_training_data(df, use_history=True, persona="default"):
     """
     training_data = []
     conversation_history = []
     has_clean_reply = {"conversation", "trigger", "move", "male_reply"}.issubset(set(df.columns))
     if has_clean_reply:
@@ -102,20 +102,20 @@ def prepare_training_data(df, use_history=True, persona="default"):
     # Fallback: dùng dữ liệu gốc (kém lý tưởng hơn)
     trigger_cols = [col for col in df.columns if col.startswith("trigger_")]
     move_cols = [col for col in df.columns if col.startswith("move_")]
     for _, row in df.iterrows():
         user_text = str(row["user_text"]) if pd.notna(row.get("user_text")) else ""
         partner_text = str(row["partner_text"]) if pd.notna(row.get("partner_text")) else ""
         if not partner_text or partner_text.strip() == "_":
             continue
         active_triggers = get_active_labels(row, trigger_cols)
         active_moves = get_active_labels(row, move_cols)
         trigger = active_triggers[0] if active_triggers[0] != "none" else "neutral"
         move = active_moves[0] if active_moves[0] != "none" else "neutral"
         if use_history and conversation_history:
             history_str = "\n".join(conversation_history)
             if user_text and user_text.strip() != "_":
@@ -128,27 +128,27 @@ def prepare_training_data(df, use_history=True, persona="default"):
                 conversation = f"Male: {user_text} ||| Female: {partner_text}"
             else:
                 conversation = f"Female: {partner_text}"
         prompt = build_instruction(conversation, trigger, move, persona)
         response = partner_text.strip()
         training_data.append(
             {
-                "instruction": prompt,
-                "input": "",
                 "output": response,
             }
         )
         if user_text and user_text.strip() != "_":
             conversation_history.append(f"Male: {user_text}")
         if partner_text and partner_text.strip() != "_":
             conversation_history.append(f"Female: {partner_text}")
         max_history = 4
         if len(conversation_history) > max_history:
             conversation_history = conversation_history[-max_history:]
     return training_data
@@ -268,18 +268,18 @@ def main():
     use_quantization = False
     quant_config = None
     if args.model_arch == "causal":
-        try:
-            import bitsandbytes as bnb
             quant_config = BitsAndBytesConfig(
-                load_in_4bit=True,
-                bnb_4bit_quant_type="nf4",
-                bnb_4bit_compute_dtype=torch.float16,
-                bnb_4bit_use_double_quant=True,
-            )
-            use_quantization = True
-            print("4-bit quantization enabled")
-        except (ImportError, ModuleNotFoundError) as e:
-            print(f"Warning: BitsAndBytesConfig not available ({e}), loading model without quantization...")
     model = None
     last_error = None
@@ -313,15 +313,15 @@ def main():
         if use_quantization:
             try:
                 model = load_base_model(use_quant=True)
-                print("Model loaded with 4-bit quantization")
-            except Exception as e:
-                last_error = e
-                print(f"Failed to load with quantization: {e}")
                 model = None
-        if model is None:
-            try:
                 model = load_base_model(use_quant=False)
-                print("Model loaded without quantization (may use more memory)")
             except Exception as e:
                 if last_error:
                     print(f"Original error: {last_error}")
@@ -356,14 +356,14 @@ def main():
     # Configure LoRA
     if args.model_arch == "causal":
-        lora_config = LoraConfig(
-            r=16,
-            lora_alpha=32,
-            target_modules=["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
-            lora_dropout=0.05,
-            bias="none",
-            task_type="CAUSAL_LM",
-        )
     else:
         lora_config = LoraConfig(
             r=16,
@@ -416,7 +416,7 @@ def main():
         training_args = TrainingArguments(
             eval_strategy="steps",
             **training_common_kwargs,
-        )
     # Create Trainer
     trainer = Trainer(

     """
     training_data = []
     conversation_history = []
     has_clean_reply = {"conversation", "trigger", "move", "male_reply"}.issubset(set(df.columns))
     if has_clean_reply:
     # Fallback: dùng dữ liệu gốc (kém lý tưởng hơn)
     trigger_cols = [col for col in df.columns if col.startswith("trigger_")]
     move_cols = [col for col in df.columns if col.startswith("move_")]
     for _, row in df.iterrows():
         user_text = str(row["user_text"]) if pd.notna(row.get("user_text")) else ""
         partner_text = str(row["partner_text"]) if pd.notna(row.get("partner_text")) else ""
         if not partner_text or partner_text.strip() == "_":
             continue
         active_triggers = get_active_labels(row, trigger_cols)
         active_moves = get_active_labels(row, move_cols)
         trigger = active_triggers[0] if active_triggers[0] != "none" else "neutral"
         move = active_moves[0] if active_moves[0] != "none" else "neutral"
         if use_history and conversation_history:
             history_str = "\n".join(conversation_history)
             if user_text and user_text.strip() != "_":
                 conversation = f"Male: {user_text} ||| Female: {partner_text}"
             else:
                 conversation = f"Female: {partner_text}"
         prompt = build_instruction(conversation, trigger, move, persona)
         response = partner_text.strip()
         training_data.append(
             {
+            "instruction": prompt,
+            "input": "",
                 "output": response,
             }
         )
         if user_text and user_text.strip() != "_":
             conversation_history.append(f"Male: {user_text}")
         if partner_text and partner_text.strip() != "_":
             conversation_history.append(f"Female: {partner_text}")
         max_history = 4
         if len(conversation_history) > max_history:
             conversation_history = conversation_history[-max_history:]
     return training_data
     use_quantization = False
     quant_config = None
     if args.model_arch == "causal":
+    try:
+        import bitsandbytes as bnb
             quant_config = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_quant_type="nf4",
+            bnb_4bit_compute_dtype=torch.float16,
+            bnb_4bit_use_double_quant=True,
+        )
+        use_quantization = True
+        print("4-bit quantization enabled")
+    except (ImportError, ModuleNotFoundError) as e:
+        print(f"Warning: BitsAndBytesConfig not available ({e}), loading model without quantization...")
     model = None
     last_error = None
         if use_quantization:
             try:
                 model = load_base_model(use_quant=True)
+            print("Model loaded with 4-bit quantization")
+        except Exception as e:
+            last_error = e
+            print(f"Failed to load with quantization: {e}")
                 model = None
+    if model is None:
+        try:
                 model = load_base_model(use_quant=False)
+            print("Model loaded without quantization (may use more memory)")
             except Exception as e:
                 if last_error:
                     print(f"Original error: {last_error}")
     # Configure LoRA
     if args.model_arch == "causal":
+    lora_config = LoraConfig(
+        r=16,
+        lora_alpha=32,
+        target_modules=["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
+        lora_dropout=0.05,
+        bias="none",
+        task_type="CAUSAL_LM",
+    )
     else:
         lora_config = LoraConfig(
             r=16,
         training_args = TrainingArguments(
             eval_strategy="steps",
             **training_common_kwargs,
+    )
     # Create Trainer
     trainer = Trainer(

finetuned_reply_service.py CHANGED Viewed

@@ -86,12 +86,12 @@ class FinetunedReplyService:
         if self.model_arch == "causal" and torch.cuda.is_available():
             try:
-                bnb_config = BitsAndBytesConfig(
-                    load_in_4bit=True,
-                    bnb_4bit_quant_type="nf4",
-                    bnb_4bit_compute_dtype=torch.float16,
-                    bnb_4bit_use_double_quant=True,
-                )
                 quant_kwargs["quantization_config"] = bnb_config
             except Exception as exc:
                 print(f"Warning: Could not enable 4-bit quantization ({exc}). Falling back to full precision.")
@@ -100,7 +100,7 @@ class FinetunedReplyService:
             if self.model_arch == "encoder_decoder":
                 model = EncoderDecoderModel.from_encoder_decoder_pretrained(
                     self.base_model_name,
-                    self.base_model_name,
                     tie_encoder_decoder=True,
                 )
                 model.config.decoder_start_token_id = getattr(self.tokenizer, "bos_token_id", self.tokenizer.cls_token_id)
@@ -108,7 +108,7 @@ class FinetunedReplyService:
                 model.config.vocab_size = model.config.encoder.vocab_size
                 return model.to(self.device)
             kwargs = dict(
-                trust_remote_code=True,
                 torch_dtype=dtype,
                 device_map=device_map,
                 token=self.api_token,
@@ -130,8 +130,8 @@ class FinetunedReplyService:
         has_lora = os.path.exists(adapter_config)
         if has_lora:
             try:
-                print(f"Loading fine-tuned weights from {self.finetuned_model_path}")
-                self.model = PeftModel.from_pretrained(base_model, self.finetuned_model_path)
             except FileNotFoundError as exc:
                 print(f"Adapter files incomplete ({exc}). Falling back to base model.")
                 self.model = base_model
@@ -185,7 +185,7 @@ class FinetunedReplyService:
                 attention_mask=inputs.get("attention_mask"),
             )
         else:
-            inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)
             generate_kwargs = dict(**inputs)
         with torch.no_grad():

         if self.model_arch == "causal" and torch.cuda.is_available():
             try:
+        bnb_config = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_quant_type="nf4",
+            bnb_4bit_compute_dtype=torch.float16,
+            bnb_4bit_use_double_quant=True,
+        )
                 quant_kwargs["quantization_config"] = bnb_config
             except Exception as exc:
                 print(f"Warning: Could not enable 4-bit quantization ({exc}). Falling back to full precision.")
             if self.model_arch == "encoder_decoder":
                 model = EncoderDecoderModel.from_encoder_decoder_pretrained(
                     self.base_model_name,
+            self.base_model_name,
                     tie_encoder_decoder=True,
                 )
                 model.config.decoder_start_token_id = getattr(self.tokenizer, "bos_token_id", self.tokenizer.cls_token_id)
                 model.config.vocab_size = model.config.encoder.vocab_size
                 return model.to(self.device)
             kwargs = dict(
+            trust_remote_code=True,
                 torch_dtype=dtype,
                 device_map=device_map,
                 token=self.api_token,
         has_lora = os.path.exists(adapter_config)
         if has_lora:
             try:
+            print(f"Loading fine-tuned weights from {self.finetuned_model_path}")
+            self.model = PeftModel.from_pretrained(base_model, self.finetuned_model_path)
             except FileNotFoundError as exc:
                 print(f"Adapter files incomplete ({exc}). Falling back to base model.")
                 self.model = base_model
                 attention_mask=inputs.get("attention_mask"),
             )
         else:
+        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)
             generate_kwargs = dict(**inputs)
         with torch.no_grad():

openai_models_cache.json ADDED Viewed

	@@ -0,0 +1,512 @@

+[
+  {
+    "id": "gpt-4-0613",
+    "created": 1686588896,
+    "owned_by": "openai"
+  },
+  {
+    "id": "gpt-4",
+    "created": 1687882411,
+    "owned_by": "openai"
+  },
+  {
+    "id": "gpt-3.5-turbo",
+    "created": 1677610602,
+    "owned_by": "openai"
+  },
+  {
+    "id": "gpt-5.1-codex-mini",
+    "created": 1763007109,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-5.1-chat-latest",
+    "created": 1762547951,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-5.1-2025-11-13",
+    "created": 1762800353,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-5.1",
+    "created": 1762800673,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-5.1-codex",
+    "created": 1762988221,
+    "owned_by": "system"
+  },
+  {
+    "id": "davinci-002",
+    "created": 1692634301,
+    "owned_by": "system"
+  },
+  {
+    "id": "babbage-002",
+    "created": 1692634615,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-3.5-turbo-instruct",
+    "created": 1692901427,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-3.5-turbo-instruct-0914",
+    "created": 1694122472,
+    "owned_by": "system"
+  },
+  {
+    "id": "dall-e-3",
+    "created": 1698785189,
+    "owned_by": "system"
+  },
+  {
+    "id": "dall-e-2",
+    "created": 1698798177,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4-1106-preview",
+    "created": 1698957206,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-3.5-turbo-1106",
+    "created": 1698959748,
+    "owned_by": "system"
+  },
+  {
+    "id": "tts-1-hd",
+    "created": 1699046015,
+    "owned_by": "system"
+  },
+  {
+    "id": "tts-1-1106",
+    "created": 1699053241,
+    "owned_by": "system"
+  },
+  {
+    "id": "tts-1-hd-1106",
+    "created": 1699053533,
+    "owned_by": "system"
+  },
+  {
+    "id": "text-embedding-3-small",
+    "created": 1705948997,
+    "owned_by": "system"
+  },
+  {
+    "id": "text-embedding-3-large",
+    "created": 1705953180,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4-0125-preview",
+    "created": 1706037612,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4-turbo-preview",
+    "created": 1706037777,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-3.5-turbo-0125",
+    "created": 1706048358,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4-turbo",
+    "created": 1712361441,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4-turbo-2024-04-09",
+    "created": 1712601677,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4o",
+    "created": 1715367049,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4o-2024-05-13",
+    "created": 1715368132,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4o-mini-2024-07-18",
+    "created": 1721172717,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4o-mini",
+    "created": 1721172741,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4o-2024-08-06",
+    "created": 1722814719,
+    "owned_by": "system"
+  },
+  {
+    "id": "chatgpt-4o-latest",
+    "created": 1723515131,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4o-realtime-preview-2024-10-01",
+    "created": 1727131766,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4o-audio-preview-2024-10-01",
+    "created": 1727389042,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4o-audio-preview",
+    "created": 1727460443,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4o-realtime-preview",
+    "created": 1727659998,
+    "owned_by": "system"
+  },
+  {
+    "id": "omni-moderation-latest",
+    "created": 1731689265,
+    "owned_by": "system"
+  },
+  {
+    "id": "omni-moderation-2024-09-26",
+    "created": 1732734466,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4o-realtime-preview-2024-12-17",
+    "created": 1733945430,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4o-audio-preview-2024-12-17",
+    "created": 1734034239,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4o-mini-realtime-preview-2024-12-17",
+    "created": 1734112601,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4o-mini-audio-preview-2024-12-17",
+    "created": 1734115920,
+    "owned_by": "system"
+  },
+  {
+    "id": "o1-2024-12-17",
+    "created": 1734326976,
+    "owned_by": "system"
+  },
+  {
+    "id": "o1",
+    "created": 1734375816,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4o-mini-realtime-preview",
+    "created": 1734387380,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4o-mini-audio-preview",
+    "created": 1734387424,
+    "owned_by": "system"
+  },
+  {
+    "id": "o3-mini",
+    "created": 1737146383,
+    "owned_by": "system"
+  },
+  {
+    "id": "o3-mini-2025-01-31",
+    "created": 1738010200,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4o-2024-11-20",
+    "created": 1739331543,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4o-search-preview-2025-03-11",
+    "created": 1741388170,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4o-search-preview",
+    "created": 1741388720,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4o-mini-search-preview-2025-03-11",
+    "created": 1741390858,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4o-mini-search-preview",
+    "created": 1741391161,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4o-transcribe",
+    "created": 1742068463,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4o-mini-transcribe",
+    "created": 1742068596,
+    "owned_by": "system"
+  },
+  {
+    "id": "o1-pro-2025-03-19",
+    "created": 1742251504,
+    "owned_by": "system"
+  },
+  {
+    "id": "o1-pro",
+    "created": 1742251791,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4o-mini-tts",
+    "created": 1742403959,
+    "owned_by": "system"
+  },
+  {
+    "id": "o3-2025-04-16",
+    "created": 1744133301,
+    "owned_by": "system"
+  },
+  {
+    "id": "o4-mini-2025-04-16",
+    "created": 1744133506,
+    "owned_by": "system"
+  },
+  {
+    "id": "o3",
+    "created": 1744225308,
+    "owned_by": "system"
+  },
+  {
+    "id": "o4-mini",
+    "created": 1744225351,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4.1-2025-04-14",
+    "created": 1744315746,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4.1",
+    "created": 1744316542,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4.1-mini-2025-04-14",
+    "created": 1744317547,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4.1-mini",
+    "created": 1744318173,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4.1-nano-2025-04-14",
+    "created": 1744321025,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4.1-nano",
+    "created": 1744321707,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-image-1",
+    "created": 1745517030,
+    "owned_by": "system"
+  },
+  {
+    "id": "codex-mini-latest",
+    "created": 1746673257,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4o-realtime-preview-2025-06-03",
+    "created": 1748907838,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4o-audio-preview-2025-06-03",
+    "created": 1748908498,
+    "owned_by": "system"
+  },
+  {
+    "id": "o4-mini-deep-research",
+    "created": 1749685485,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-4o-transcribe-diarize",
+    "created": 1750798887,
+    "owned_by": "system"
+  },
+  {
+    "id": "o4-mini-deep-research-2025-06-26",
+    "created": 1750866121,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-5-chat-latest",
+    "created": 1754073306,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-5-2025-08-07",
+    "created": 1754075360,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-5",
+    "created": 1754425777,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-5-mini-2025-08-07",
+    "created": 1754425867,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-5-mini",
+    "created": 1754425928,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-5-nano-2025-08-07",
+    "created": 1754426303,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-5-nano",
+    "created": 1754426384,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-audio-2025-08-28",
+    "created": 1756256146,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-realtime",
+    "created": 1756271701,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-realtime-2025-08-28",
+    "created": 1756271773,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-audio",
+    "created": 1756339249,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-5-codex",
+    "created": 1757527818,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-image-1-mini",
+    "created": 1758845821,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-5-pro-2025-10-06",
+    "created": 1759469707,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-5-pro",
+    "created": 1759469822,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-audio-mini",
+    "created": 1759512027,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-audio-mini-2025-10-06",
+    "created": 1759512137,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-5-search-api",
+    "created": 1759514629,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-realtime-mini",
+    "created": 1759517133,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-realtime-mini-2025-10-06",
+    "created": 1759517175,
+    "owned_by": "system"
+  },
+  {
+    "id": "sora-2",
+    "created": 1759708615,
+    "owned_by": "system"
+  },
+  {
+    "id": "sora-2-pro",
+    "created": 1759708663,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-5-search-api-2025-10-14",
+    "created": 1760043960,
+    "owned_by": "system"
+  },
+  {
+    "id": "gpt-3.5-turbo-16k",
+    "created": 1683758102,
+    "owned_by": "openai-internal"
+  },
+  {
+    "id": "tts-1",
+    "created": 1681940951,
+    "owned_by": "openai-internal"
+  },
+  {
+    "id": "whisper-1",
+    "created": 1677532384,
+    "owned_by": "openai-internal"
+  },
+  {
+    "id": "text-embedding-ada-002",
+    "created": 1671217299,
+    "owned_by": "openai-internal"
+  }
+]

openai_service.py ADDED Viewed

	@@ -0,0 +1,250 @@

+"""
+Service for generating replies using OpenAI API.
+"""
+import os
+import json
+from pathlib import Path
+from typing import Optional, List, Dict, Any
+from openai import OpenAI
+# Reuse the same system prompt from Perplexity service
+SYSTEM_PROMPT = """
+Bạn là một wingman AI tinh tế, chuyên giúp Nam soạn 1 tin nhắn trả lời duy nhất trong hội thoại hẹn hò tiếng Việt. Bạn luôn nhìn từ góc nhìn của Nam, xưng "anh" và gọi đối phương là "em".
+Bạn được cung cấp:
+- HỘI THOẠI: đoạn hội thoại gần nhất giữa Nam (Male) và Nữ (Female), phân tách các tin bằng ký hiệu "|||".
+- TRIGGER: intent hiện tại (ví dụ: neutral, positive, negative, confused...).
+- MOVE: chiến lược hiện tại (ví dụ: escalate, hold, de-escalate, tease, comfort...).
+Nhiệm vụ của bạn:
+- Dựa trên HỘI THOẠI + TRIGGER + MOVE, hãy chọn một hướng phản hồi tự nhiên, duyên dáng, đúng chiến lược (không quá đẩy hay quá lùi so với MOVE).
+- Ưu tiên giữ mạch cảm xúc nhất quán với hội thoại, tránh tạo thông tin fact mới về thế giới bên ngoài hoặc về hai người.
+QUY TẮC CỨNG:
+- Chỉ trả về đúng 1 câu duy nhất.
+- Tối đa 25 từ tiếng Việt.
+- Lịch sự, ấm áp, thân thiện; không phán xét, không thô lỗ.
+- Không giải thích meta (không nói về "prompt", "AI", "chiến lược", "MOVE", "TRIGGER"...).
+- Không lặp lại nguyên văn câu của đối phương.
+- Không thêm fact mới (chỉ dựa trên những gì có trong hội thoại, hoặc các câu nói chung chung, không cụ thể hóa thông tin chưa có).
+Khi TRIGGER hoặc MOVE có vẻ mâu thuẫn với HỘI THOẠI:
+- Hãy ưu tiên sự an toàn và mềm mại.
+- Có thể hỏi lại nhẹ nhàng để làm rõ, nhưng vẫn giữ frame chủ động, tự tin của Nam.
+PHONG CÁCH:
+- Ấm áp, tự tin nhưng không tự cao.
+- Có thể dùng từ đệm tự nhiên (nha, nhé, ạ, dạ) khi phù hợp với ngữ cảnh.
+- Phản chiếu cảm xúc của đối phương.
+- Giữ mạch trò chuyện mở để còn đất tăng tương tác về sau.
+Nếu vì bất kỳ lý do gì bạn không thể tuân thủ tất cả quy tắc trên:
+- Hãy ưu tiên vẫn trả về đúng 1 câu, ≤25 từ, không chứa meta, không chứa thông tin fact mới.
+""".strip()
+MODELS_CACHE_PATH = Path(__file__).resolve().parent / "config" / "ai_models.json"
+ENV = os.getenv("APP_ENV", "development").lower()
+def get_openai_client() -> OpenAI:
+    """Get OpenAI client with API key from environment."""
+    api_key = os.getenv("OPENAI_API_KEY")
+    if not api_key:
+        raise ValueError(
+            "OpenAI API key is required.\n\n"
+            "Set environment variable:\n"
+            "  export OPENAI_API_KEY=sk-...\n\n"
+            "Or add to .env file."
+        )
+    return OpenAI(api_key=api_key)
+def fetch_and_cache_models_if_needed() -> List[Dict[str, Any]]:
+    """
+    Local/dev:
+      - If cache file does not exist -> call OpenAI models.list() and save to JSON
+      - If cache file exists -> just read
+    Production:
+      - Never call OpenAI models.list(), only read from JSON
+    """
+    # If cache exists, always use it
+    if MODELS_CACHE_PATH.exists():
+        with MODELS_CACHE_PATH.open("r", encoding="utf-8") as f:
+            return json.load(f)
+    # If production and cache missing: fail fast
+    if ENV in ("production", "prod"):
+        raise RuntimeError(
+            f"Models cache not found at {MODELS_CACHE_PATH}. "
+            f"Generate it in development before deploying."
+        )
+    # Dev/test: call OpenAI and write cache
+    client = get_openai_client()
+    models = client.models.list()  # returns a ModelList object
+    data = []
+    for m in models.data:
+        data.append(
+            {
+                "id": m.id,
+                "created": getattr(m, "created", None),
+                "owned_by": getattr(m, "owned_by", None),
+            }
+        )
+    MODELS_CACHE_PATH.parent.mkdir(parents=True, exist_ok=True)
+    with MODELS_CACHE_PATH.open("w", encoding="utf-8") as f:
+        json.dump(data, f, ensure_ascii=False, indent=2)
+    return data
+def get_available_models(prefix_filters: Optional[List[str]] = None) -> List[str]:
+    """
+    Return list of model ids from cache (generate cache in dev if needed).
+    Optional: filter by prefix, e.g. ["gpt-4", "gpt-3.5"].
+    """
+    models = fetch_and_cache_models_if_needed()
+    ids = [m["id"] for m in models]
+    if prefix_filters:
+        filtered = []
+        for mid in ids:
+            if any(mid.startswith(p) for p in prefix_filters):
+                filtered.append(mid)
+        return filtered
+    return ids
+def create_chat_response(
+    model: str,
+    user_message: str,
+) -> str:
+    """
+    Use chosen model to create a simple text response via Responses API.
+    """
+    client = get_openai_client()
+    resp = client.responses.create(
+        model=model,
+        input=user_message,
+    )
+    # Responses API trả về output dạng structured
+    # resp.output[0].content[0].text là string trực tiếp
+    if resp.output and len(resp.output) > 0:
+        first_output = resp.output[0]
+        if first_output.content and len(first_output.content) > 0:
+            first_content = first_output.content[0]
+            if hasattr(first_content, 'text'):
+                text = first_content.text
+                # text là string trực tiếp
+                return str(text) if text else ""
+    # Fallback: use output_text if available
+    if hasattr(resp, 'output_text') and resp.output_text:
+        return str(resp.output_text)
+    raise ValueError("No text found in response")
+class OpenAIReplyService:
+    """Service for generating replies using OpenAI API."""
+    def __init__(self, api_key: Optional[str] = None, model_name: str = "gpt-4o-mini"):
+        """
+        Initialize OpenAI service.
+        Args:
+            api_key: OpenAI API key. If None, will try to get from OPENAI_API_KEY env var.
+            model_name: Model name to use (default: "gpt-4o-mini")
+        """
+        if api_key:
+            os.environ["OPENAI_API_KEY"] = api_key
+        self.model_name = model_name
+        self.client = get_openai_client()
+    def generate_reply(
+        self,
+        conversation: str,
+        trigger: str,
+        move: str,
+    ) -> str:
+        """
+        Generate reply using OpenAI API.
+        Args:
+            conversation: Conversation text in format "Male: ... ||| Female: ..."
+            trigger: Trigger label (e.g., "rapport_bid", "flirt_charm")
+            move: Move label (e.g., "charm", "invite", "validate")
+        Returns:
+            Generated reply text (1 sentence, ≤25 words)
+        """
+        user_content = f"""
+HỘI THOẠI: "{conversation}"
+TRIGGER: "{trigger}"
+MOVE: "{move}"
+""".strip()
+        # Combine system prompt and user content
+        full_prompt = f"{SYSTEM_PROMPT}\n\n{user_content}"
+        try:
+            # Use Responses API
+            response = create_chat_response(
+                model=self.model_name,
+                user_message=full_prompt,
+            )
+            raw = response.strip() if response else ""
+            # Hậu xử lý: lấy câu đầu, giới hạn 25 từ
+            import re
+            # Tách theo dấu câu, lấy câu đầu
+            sentences = re.split(r'[.!?]', raw)
+            one_sentence = sentences[0].strip() if sentences else raw.strip()
+            # Giới hạn 25 từ
+            words = one_sentence.split()
+            limited = " ".join(words[:25])
+            # Đảm bảo kết thúc bằng dấu câu nếu cần
+            if limited and not limited[-1] in ".!?":
+                limited = limited.rstrip(",;:") + "."
+            return limited
+        except Exception as e:
+            error_msg = str(e)
+            raise Exception(f"OpenAI API error: {error_msg}")
+# Global singleton instance
+_openai_service = None
+def get_openai_service(
+    api_key: Optional[str] = None,
+    model_name: str = "gpt-4o-mini",
+) -> OpenAIReplyService:
+    """Get or create the global OpenAI service instance."""
+    global _openai_service
+    if _openai_service is None or _openai_service.model_name != model_name:
+        _openai_service = OpenAIReplyService(api_key=api_key, model_name=model_name)
+    return _openai_service

setup_and_finetune.py CHANGED Viewed

@@ -166,17 +166,17 @@ def run_training_plan(tasks):
                 continue
             print(f"[START] Training {task['name']} ...")
             try:
-                result = subprocess.run(
                     task["command"],
-                    capture_output=True,
-                    text=True,
-                    check=False
-                )
-                if result.returncode == 0:
                     print(f"[DONE] {task['name']} completed successfully.")
-                else:
                     print(f"[FAIL] {task['name']} exited with error code {result.returncode}")
-                    print(result.stderr)
             except Exception as exc:
                 print(f"[ERROR] {task['name']} -> {exc}")
         print("All auto-training tasks finished.")

                 continue
             print(f"[START] Training {task['name']} ...")
             try:
+            result = subprocess.run(
                     task["command"],
+                capture_output=True,
+                text=True,
+                check=False
+            )
+            if result.returncode == 0:
                     print(f"[DONE] {task['name']} completed successfully.")
+            else:
                     print(f"[FAIL] {task['name']} exited with error code {result.returncode}")
+                print(result.stderr)
             except Exception as exc:
                 print(f"[ERROR] {task['name']} -> {exc}")
         print("All auto-training tasks finished.")

test_all_openai_models.py ADDED Viewed

	@@ -0,0 +1,139 @@

+"""
+Test all OpenAI models and create whitelist of working models.
+"""
+import os
+import json
+from pathlib import Path
+from dotenv import load_dotenv
+from openai_service import get_available_models, get_openai_service
+# Load .env file
+env_path = Path(__file__).parent / '.env'
+if env_path.exists():
+    load_dotenv(env_path)
+# Test data
+TEST_CONVERSATION = "Male: Tối nay anh có lịch đột xuất. ||| Female: Thế mai được không?"
+TEST_TRIGGER = "neutral"
+TEST_MOVE = "escalate"
+def test_model(model_name: str, max_retries: int = 2) -> tuple[bool, str, str]:
+    """
+    Test a single OpenAI model.
+    Returns:
+        (success: bool, reply: str, error: str)
+    """
+    for attempt in range(max_retries):
+        try:
+            service = get_openai_service(model_name=model_name)
+            formatted_conversation = f"Male: {TEST_CONVERSATION.split('|||')[0].strip()} ||| Female: {TEST_CONVERSATION.split('|||')[1].strip()}"
+            reply = service.generate_reply(
+                conversation=formatted_conversation,
+                trigger=TEST_TRIGGER,
+                move=TEST_MOVE,
+            )
+            if reply and len(reply.strip()) > 0:
+                return True, reply, ""
+            else:
+                return False, "", "Empty reply"
+        except Exception as e:
+            error_msg = str(e)
+            if attempt < max_retries - 1:
+                continue  # Retry
+            return False, "", error_msg
+    return False, "", "Max retries exceeded"
+def main():
+    """Test all OpenAI models and create whitelist."""
+    print("=" * 60)
+    print("Testing All OpenAI Models for Whitelist")
+    print("=" * 60)
+    # Fetch all available models
+    try:
+        print("\nFetching available OpenAI models...")
+        # Filter for common chat models
+        all_model_ids = get_available_models(prefix_filters=["gpt-", "o1-", "o3-"])
+        print(f"✓ Found {len(all_model_ids)} models with prefixes: gpt-, o1-, o3-")
+    except Exception as e:
+        print(f"✗ Error fetching models: {str(e)}")
+        return 1
+    # Test each model
+    whitelist = []
+    failed_models = []
+    print(f"\nTesting {len(all_model_ids)} models...")
+    print("=" * 60)
+    for idx, model_id in enumerate(all_model_ids, 1):
+        print(f"\n[{idx}/{len(all_model_ids)}] Testing: {model_id}")
+        success, reply, error = test_model(model_id)
+        if success:
+            print(f"  ✓ PASSED - Reply: {reply[:60]}...")
+            whitelist.append({
+                "id": model_id,
+                "name": model_id,
+                "displayName": model_id,
+                "test_reply": reply[:100],  # Store sample reply
+            })
+        else:
+            print(f"  ✗ FAILED - {error[:100]}")
+            failed_models.append({
+                "id": model_id,
+                "name": model_id,
+                "error": error[:200],
+            })
+    # Save whitelist
+    whitelist_file = Path(__file__).parent / "openai_models_whitelist.json"
+    with open(whitelist_file, "w", encoding="utf-8") as f:
+        json.dump({
+            "whitelist": whitelist,
+            "failed": failed_models,
+            "test_data": {
+                "conversation": TEST_CONVERSATION,
+                "trigger": TEST_TRIGGER,
+                "move": TEST_MOVE,
+            },
+            "total_tested": len(all_model_ids),
+            "passed": len(whitelist),
+            "failed": len(failed_models),
+        }, f, indent=2, ensure_ascii=False)
+    # Print summary
+    print("\n" + "=" * 60)
+    print("Test Summary")
+    print("=" * 60)
+    print(f"Total models tested: {len(all_model_ids)}")
+    print(f"✓ Passed (whitelist): {len(whitelist)}")
+    print(f"✗ Failed: {len(failed_models)}")
+    print(f"\nWhitelist saved to: {whitelist_file}")
+    if whitelist:
+        print("\n✓ Working models (whitelist):")
+        for model in whitelist:
+            print(f"  - {model['displayName']} ({model['id']})")
+    if failed_models:
+        print("\n✗ Failed models:")
+        for model in failed_models[:10]:  # Show first 10
+            print(f"  - {model['name']}: {model['error'][:50]}...")
+        if len(failed_models) > 10:
+            print(f"  ... and {len(failed_models) - 10} more")
+    return 0 if whitelist else 1
+if __name__ == "__main__":
+    exit(main())

test_openai_list_models.py ADDED Viewed

	@@ -0,0 +1,103 @@

+"""
+Test script to fetch OpenAI models list.
+"""
+import os
+from pathlib import Path
+from dotenv import load_dotenv
+from openai import OpenAI
+# Load .env file
+env_path = Path(__file__).parent / '.env'
+if env_path.exists():
+    load_dotenv(env_path)
+    print(f"✓ Loaded .env file from {env_path}")
+else:
+    print(f"⚠ .env file not found at {env_path}, using environment variables only")
+# Get API key
+api_key = os.getenv("OPENAI_API_KEY")
+if not api_key:
+    print("❌ OPENAI_API_KEY not found in environment variables")
+    print("\nPlease set it:")
+    print("  export OPENAI_API_KEY=sk-...")
+    exit(1)
+print(f"✓ OpenAI API Key found: {api_key[:10]}...{api_key[-4:]}")
+# Create client
+try:
+    client = OpenAI(api_key=api_key)
+    print("✓ OpenAI client created successfully")
+except Exception as e:
+    print(f"✗ Error creating OpenAI client: {e}")
+    exit(1)
+# Fetch models
+print("\n" + "=" * 60)
+print("Fetching OpenAI models list...")
+print("=" * 60)
+try:
+    models = client.models.list()
+    print(f"✓ Successfully fetched models list")
+    print(f"  Total models: {len(models.data)}")
+    # Filter for common chat models
+    chat_models = []
+    prefixes = ["gpt-", "o1-", "o3-"]
+    for model in models.data:
+        model_id = model.id
+        if any(model_id.startswith(prefix) for prefix in prefixes):
+            chat_models.append({
+                "id": model_id,
+                "created": getattr(model, "created", None),
+                "owned_by": getattr(model, "owned_by", None),
+            })
+    print(f"\n✓ Found {len(chat_models)} chat models (filtered by prefixes: {prefixes})")
+    print("\n" + "=" * 60)
+    print("Chat Models List:")
+    print("=" * 60)
+    for idx, model in enumerate(chat_models[:20], 1):  # Show first 20
+        created = model.get("created")
+        owned_by = model.get("owned_by", "unknown")
+        print(f"{idx}. {model['id']} (owned_by: {owned_by}, created: {created})")
+    if len(chat_models) > 20:
+        print(f"\n... and {len(chat_models) - 20} more models")
+    # Save to cache
+    cache_path = Path(__file__).parent / "config" / "ai_models.json"
+    cache_path.parent.mkdir(parents=True, exist_ok=True)
+    import json
+    all_models_data = []
+    for model in models.data:
+        all_models_data.append({
+            "id": model.id,
+            "created": getattr(model, "created", None),
+            "owned_by": getattr(model, "owned_by", None),
+        })
+    with open(cache_path, "w", encoding="utf-8") as f:
+        json.dump(all_models_data, f, ensure_ascii=False, indent=2)
+    print(f"\n✓ Saved all models to cache: {cache_path}")
+    print(f"  Total models cached: {len(all_models_data)}")
+    print("\n" + "=" * 60)
+    print("Test Summary")
+    print("=" * 60)
+    print(f"✓ API connection: SUCCESS")
+    print(f"✓ Models fetched: {len(models.data)}")
+    print(f"✓ Chat models (filtered): {len(chat_models)}")
+    print(f"✓ Cache saved: {cache_path}")
+except Exception as e:
+    print(f"\n✗ Error fetching models: {str(e)}")
+    import traceback
+    traceback.print_exc()
+    exit(1)

test_openai_response.py ADDED Viewed

	@@ -0,0 +1,94 @@

+"""
+Test OpenAI Responses API với một model.
+"""
+import os
+from pathlib import Path
+from dotenv import load_dotenv
+from openai import OpenAI
+# Load .env file
+env_path = Path(__file__).parent / '.env'
+if env_path.exists():
+    load_dotenv(env_path)
+api_key = os.getenv("OPENAI_API_KEY")
+if not api_key:
+    print("❌ OPENAI_API_KEY not found")
+    exit(1)
+client = OpenAI(api_key=api_key)
+# Test data
+SYSTEM_PROMPT = "You are a helpful assistant. Respond briefly in Vietnamese."
+USER_MESSAGE = "Xin chào, bạn khỏe không?"
+print("=" * 60)
+print("Testing OpenAI Responses API")
+print("=" * 60)
+# Test với gpt-4o-mini
+model = "gpt-4o-mini"
+print(f"\nTesting model: {model}")
+print(f"System prompt: {SYSTEM_PROMPT}")
+print(f"User message: {USER_MESSAGE}")
+try:
+    full_prompt = f"{SYSTEM_PROMPT}\n\n{USER_MESSAGE}"
+    print("\nCalling client.responses.create()...")
+    resp = client.responses.create(
+        model=model,
+        input=full_prompt,
+    )
+    print(f"✓ Response received")
+    print(f"  Response type: {type(resp)}")
+    print(f"  Response attributes: {dir(resp)}")
+    # Extract text
+    if hasattr(resp, 'output') and resp.output:
+        print(f"  Output type: {type(resp.output)}")
+        print(f"  Output length: {len(resp.output) if hasattr(resp.output, '__len__') else 'N/A'}")
+        if len(resp.output) > 0:
+            first_output = resp.output[0]
+            print(f"  First output type: {type(first_output)}")
+            print(f"  First output attributes: {dir(first_output)}")
+            if hasattr(first_output, 'content') and first_output.content:
+                print(f"  Content type: {type(first_output.content)}")
+                print(f"  Content length: {len(first_output.content) if hasattr(first_output.content, '__len__') else 'N/A'}")
+                if len(first_output.content) > 0:
+                    first_content = first_output.content[0]
+                    print(f"  First content type: {type(first_content)}")
+                    print(f"  First content attributes: {dir(first_content)}")
+                    if hasattr(first_content, 'text'):
+                        text_obj = first_content.text
+                        print(f"  Text object type: {type(text_obj)}")
+                        print(f"  Text object attributes: {dir(text_obj)}")
+                        if hasattr(text_obj, 'value'):
+                            text_value = text_obj.value
+                            print(f"\n✓ SUCCESS!")
+                            print(f"  Response text: {text_value}")
+                        else:
+                            print(f"  Text object: {text_obj}")
+                    else:
+                        print(f"  First content: {first_content}")
+                else:
+                    print("  No content found")
+            else:
+                print("  No content attribute found")
+        else:
+            print("  Output is empty")
+    else:
+        print(f"  Response object: {resp}")
+        print(f"  Full response: {resp}")
+except Exception as e:
+    print(f"\n✗ Error: {str(e)}")
+    import traceback
+    traceback.print_exc()

trigger_move_identifier.py CHANGED Viewed

@@ -64,7 +64,7 @@ class TriggerMoveIdentifier:
         "pivot_disreengage",
         "neutral",
     ]
     DEFAULT_MOVE_LABELS = [
         "spark",
         "intrigue",
@@ -92,7 +92,7 @@ class TriggerMoveIdentifier:
         "relate",
         "neutral",
     ]
     def __init__(
         self,
         model_dir: str = "./models/trigger_detector",
@@ -212,13 +212,13 @@ class TriggerMoveIdentifier:
             "Respond using format 'Trigger: <trigger> | Move: <move>'"
         )
-        try:
-            response = self.client.text_generation(
-                prompt,
                 max_new_tokens=64,
-                temperature=0.3,
                 return_full_text=False,
-            )
             return self._parse_response(response)
         except Exception as exc:
             print(f"Inference API error: {exc}, falling back to heuristics.")

         "pivot_disreengage",
         "neutral",
     ]
     DEFAULT_MOVE_LABELS = [
         "spark",
         "intrigue",
         "relate",
         "neutral",
     ]
     def __init__(
         self,
         model_dir: str = "./models/trigger_detector",
             "Respond using format 'Trigger: <trigger> | Move: <move>'"
         )
+            try:
+                response = self.client.text_generation(
+                    prompt,
                 max_new_tokens=64,
+                    temperature=0.3,
                 return_full_text=False,
+                )
             return self._parse_response(response)
         except Exception as exc:
             print(f"Inference API error: {exc}, falling back to heuristics.")