Spaces:

noppodev
/

NoppoIntelligence

Sleeping

App Files Files Community

noppodev commited on Apr 12

Commit

9beded6

verified ·

1 Parent(s): 3ca5b86

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -21

app.py CHANGED Viewed

@@ -2,37 +2,34 @@ import os
 import subprocess
 import shutil
 import gradio as gr
-from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 import torch
 from huggingface_hub import HfApi
 def prepare_model():
     model_path = "./ni_v1_model"
-    # Hugging Faceのキャッシュディレクトリ（ここがパンクの元）
-    cache_path = os.path.expanduser("~/.cache/huggingface/hub")
     token = os.getenv("HF_TOKEN")
     if not os.path.exists(model_path):
         print("🧹 ストレージ確保のため、古い残骸を掃除するぜ...")
-        # 以前のマージ失敗作があれば削除
         if os.path.exists(model_path):
             shutil.rmtree(model_path)
-        # もし容量がギリギリならキャッシュも消す（再ダウンロードになるけど背に腹は代えられない）
-        # shutil.rmtree(cache_path, ignore_errors=True)
         print("🚀 NI-v1 マージ開始...")
         env = os.environ.copy()
         if token:
             env["HF_TOKEN"] = token
             try:
                 subprocess.run(["hf", "auth", "login", "--token", token], check=True)
             except:
-                print("⚠️ ログインスキップ（環境変数で続行）")
         try:
-            # --lazy-unpickle: メモリとディスク消費を抑える魔法の引数
-            # --low-cpu-mem: さらに負荷を減らす
             subprocess.run(
                 ["mergekit-yaml", "config.yaml", model_path,
                  "--allow-crimes",
@@ -41,42 +38,67 @@ def prepare_model():
                 check=True,
                 env=env
             )
-            print("✨ マージ成功。のっぽ、耐えたぜ！")
         except:
-            raise RuntimeError("マージ失敗。容量か設定を見直してくれ。")
-    print("🧠 NI-v1 ロード中...")
     tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
     model = AutoModelForCausalLM.from_pretrained(
         model_path,
-        torch_dtype=torch.bfloat16,
         device_map="auto",
         trust_remote_code=True
     )
     return pipeline("text-generation", model=model, tokenizer=tokenizer)
 try:
     pipe = prepare_model()
 except Exception as e:
     print(f"起動失敗: {e}")
     pipe = None
 def predict(message, history):
-    if pipe is None: return "知能ユニット未起動。"
     prompt = f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
     outputs = pipe(prompt, max_new_tokens=512, do_sample=True, temperature=0.7)
-    return outputs[0]['generated_text'].split("assistant\n")[-1].replace("<|im_end|>", "")
 with gr.Blocks(title="NI-v1.0") as demo:
     gr.Markdown("# 🤖 Noppo-Intelligence v1.0")
     with gr.Tab("チャット"):
         gr.ChatInterface(fn=predict)
     with gr.Tab("公開"):
         repo_id = gr.Textbox(label="Repo ID", value="noppodev/NoppoIntelligence")
-        user_token = gr.Textbox(label="Write Token", type="password")
-        pub_btn = gr.Button("アップロード")
         status = gr.Textbox(label="Status")
         def upload(r, t):
@@ -84,8 +106,9 @@ with gr.Blocks(title="NI-v1.0") as demo:
                 api = HfApi()
                 api.create_repo(repo_id=r, repo_type="model", exist_ok=True)
                 api.upload_folder(folder_path="./ni_v1_model", repo_id=r, token=t)
-                return "✅ 完了だぜ！"
-            except Exception as e: return f"❌ エラー: {e}"
         pub_btn.click(upload, [repo_id, user_token], status)

 import subprocess
 import shutil
 import gradio as gr
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig
 import torch
 from huggingface_hub import HfApi
+# ---------------------------------------------------------
+# 1. モデル準備セクション (ストレージ清掃 + 認証 + マージ)
+# ---------------------------------------------------------
 def prepare_model():
     model_path = "./ni_v1_model"
     token = os.getenv("HF_TOKEN")
     if not os.path.exists(model_path):
         print("🧹 ストレージ確保のため、古い残骸を掃除するぜ...")
         if os.path.exists(model_path):
             shutil.rmtree(model_path)
         print("🚀 NI-v1 マージ開始...")
         env = os.environ.copy()
         if token:
             env["HF_TOKEN"] = token
+            # 2026年最新の hf コマンドで認証
             try:
                 subprocess.run(["hf", "auth", "login", "--token", token], check=True)
             except:
+                print("⚠️ 認証コマンド失敗。環境変数のみで続行するぜ。")
         try:
+            # ストレージとメモリを節約するオプション付きでマージ実行
             subprocess.run(
                 ["mergekit-yaml", "config.yaml", model_path,
                  "--allow-crimes",
                 check=True,
                 env=env
             )
+            print("✨ マージ成功。のっぽ、やったぜ！")
         except:
+            raise RuntimeError("マージ失敗。config.yaml のレイヤー数を減らしてくれ。")
+    print("🧠 NI-v1 ロード中 (4-bit 量子化でメモリ節約モード)...")
+    # メモリ不足対策の量子化設定
+    bnb_config = BitsAndBytesConfig(
+        load_in_4bit=True,
+        bnb_4bit_compute_dtype=torch.bfloat16,
+        bnb_4bit_quant_type="nf4",
+        bnb_4bit_use_double_quant=True,
+    )
     tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
     model = AutoModelForCausalLM.from_pretrained(
         model_path,
+        quantization_config=bnb_config, # ここでメモリを大幅節約
         device_map="auto",
         trust_remote_code=True
     )
     return pipeline("text-generation", model=model, tokenizer=tokenizer)
+# ユニット起動
 try:
     pipe = prepare_model()
 except Exception as e:
     print(f"起動失敗: {e}")
     pipe = None
+# ---------------------------------------------------------
+# 2. 推論ロジック
+# ---------------------------------------------------------
 def predict(message, history):
+    if pipe is None:
+        return "知能ユニットが起動してないぜ。容量不足かロードエラーだ。"
+    # ユーザー指定のプロンプト形式
     prompt = f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
     outputs = pipe(prompt, max_new_tokens=512, do_sample=True, temperature=0.7)
+    # 応答部分を抽出
+    response = outputs[0]['generated_text'].split("assistant\n")[-1].replace("<|im_end|>", "")
+    return response
+# ---------------------------------------------------------
+# 3. UIセクション (ChatInterfaceで送信エラーを物理的に防ぐ)
+# ---------------------------------------------------------
 with gr.Blocks(title="NI-v1.0") as demo:
     gr.Markdown("# 🤖 Noppo-Intelligence v1.0")
     with gr.Tab("チャット"):
+        # 履歴管理をGradioに任せるのが、送信エラーを回避する一番の近道だ
         gr.ChatInterface(fn=predict)
     with gr.Tab("公開"):
+        gr.Markdown("### 完成した NI-v1 を Hugging Face にアップロード")
+        # リポジトリ名はのっぽ指定のもの
         repo_id = gr.Textbox(label="Repo ID", value="noppodev/NoppoIntelligence")
+        user_token = gr.Textbox(label="Write Token (HF_TOKEN)", type="password")
+        pub_btn = gr.Button("アップロード開始")
         status = gr.Textbox(label="Status")
         def upload(r, t):
                 api = HfApi()
                 api.create_repo(repo_id=r, repo_type="model", exist_ok=True)
                 api.upload_folder(folder_path="./ni_v1_model", repo_id=r, token=t)
+                return "✅ アップロード完了！BeyondIntelligenceへの第一歩だ。"
+            except Exception as e:
+                return f"❌ エラー発生: {e}"
         pub_btn.click(upload, [repo_id, user_token], status)