PromptEnhancer_32B-FlashPack

Sleeping

App Files Files Community

rahul7star commited on Oct 27, 2025

Commit

9b6142b

verified ·

1 Parent(s): d191426

Update app_flash.py

Browse files

Files changed (1) hide show

app_flash.py +27 -25

app_flash.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# prompt_enhancer_flashpack_cpu.py
 import gc
 import torch
 import torch.nn as nn
@@ -13,14 +13,14 @@ from typing import Tuple
 # 🖥 Force CPU mode
 # ============================================================
 device = torch.device("cpu")
-torch.set_num_threads(4)  # reduce CPU contention
 print(f"🔧 Forcing device: {device}  (CPU-only mode)")
 # ============================================================
 # 1️⃣ Define FlashPack model
 # ============================================================
 class GemmaTrainer(nn.Module, FlashPackMixin):
-    def __init__(self, input_dim: int = 768, hidden_dim: int = 1024, output_dim: int = 768):
         super().__init__()
         self.fc1 = nn.Linear(input_dim, hidden_dim)
         self.relu = nn.ReLU()
@@ -58,30 +58,23 @@ def build_encoder(model_name="gpt2", max_length: int = 32):
     return tokenizer, embed_model, encode
 # ============================================================
-# 3️⃣ Train FlashPack mapping (CPU-optimized)
 # ============================================================
-def train_flashpack_model(
     dataset_name: str = "gokaygokay/prompt-enhancer-dataset",
-    model_name: str = "gpt2",
-    max_length: int = 32,
-    max_encode: int = 1000,  # use smaller number for CPU
-    push_to_hub: bool = False,
     hf_repo: str = "rahul7star/FlashPack",
 ) -> Tuple[GemmaTrainer, object, object, object, torch.Tensor]:
-    # 1️⃣ Load dataset
     print("📦 Loading dataset...")
     dataset = load_dataset(dataset_name, split="train")
-    # Limit dataset to max_encode prompts
     limit = min(max_encode, len(dataset))
     dataset = dataset.select(range(limit))
     print(f"⚡ Encoding only {len(dataset)} prompts (max limit {max_encode})")
-    # 2️⃣ Setup encoder
-    tokenizer, embed_model, encode_fn = build_encoder(model_name, max_length)
-    # 3️⃣ Encode dataset
     print("🔢 Encoding dataset into embeddings (CPU-friendly)...")
     short_list, long_list = [], []
     for i, item in enumerate(dataset):
@@ -96,7 +89,6 @@ def train_flashpack_model(
     long_embeddings = torch.vstack(long_list)
     print(f"✅ Finished encoding {short_embeddings.shape[0]} prompts")
-    # 4️⃣ Initialize & train model
     model = GemmaTrainer(
         input_dim=short_embeddings.shape[1],
         hidden_dim=min(512, short_embeddings.shape[1]),
@@ -132,25 +124,35 @@ def train_flashpack_model(
     print("✅ Training finished!")
-    # 5️⃣ Push to HF repo if requested
     if push_to_hub:
         model.save_flashpack(hf_repo, target_dtype=torch.float32, push_to_hub=True)
         print(f"✅ Model pushed to HF repo: {hf_repo}")
     return model, dataset, embed_model, tokenizer, long_embeddings
 # ============================================================
-# 4️⃣ Run training & load model
 # ============================================================
-model, dataset, embed_model, tokenizer, long_embeddings = train_flashpack_model(
-    max_encode=1000,  # safe CPU-friendly subset
-    push_to_hub=False
 )
-model.eval()
 # ============================================================
-# 5️⃣ Inference helpers
 # ============================================================
 @torch.no_grad()
 def encode_for_inference(prompt: str) -> torch.Tensor:
@@ -182,7 +184,7 @@ def enhance_prompt(user_prompt: str, temperature: float, max_tokens: int, chat_h
     return chat_history
 # ============================================================
-# 6️⃣ Gradio UI
 # ============================================================
 with gr.Blocks(title="Prompt Enhancer – FlashPack (CPU)", theme=gr.themes.Soft()) as demo:
     gr.Markdown(
@@ -207,7 +209,7 @@ with gr.Blocks(title="Prompt Enhancer – FlashPack (CPU)", theme=gr.themes.Soft
     clear_btn.click(lambda: [], None, chatbot)
 # ============================================================
-# 7️⃣ Launch
 # ============================================================
 if __name__ == "__main__":
     demo.launch(show_error=True)

+# prompt_enhancer_flashpack_cpu_publish.py
 import gc
 import torch
 import torch.nn as nn
 # 🖥 Force CPU mode
 # ============================================================
 device = torch.device("cpu")
+torch.set_num_threads(4)
 print(f"🔧 Forcing device: {device}  (CPU-only mode)")
 # ============================================================
 # 1️⃣ Define FlashPack model
 # ============================================================
 class GemmaTrainer(nn.Module, FlashPackMixin):
+    def __init__(self, input_dim: int = 768, hidden_dim: int = 512, output_dim: int = 768):
         super().__init__()
         self.fc1 = nn.Linear(input_dim, hidden_dim)
         self.relu = nn.ReLU()
     return tokenizer, embed_model, encode
 # ============================================================
+# 3️⃣ Train and push FlashPack model
 # ============================================================
+def train_and_push_flashpack(
     dataset_name: str = "gokaygokay/prompt-enhancer-dataset",
     hf_repo: str = "rahul7star/FlashPack",
+    max_encode: int = 1000,
+    push_to_hub: bool = True,
 ) -> Tuple[GemmaTrainer, object, object, object, torch.Tensor]:
     print("📦 Loading dataset...")
     dataset = load_dataset(dataset_name, split="train")
     limit = min(max_encode, len(dataset))
     dataset = dataset.select(range(limit))
     print(f"⚡ Encoding only {len(dataset)} prompts (max limit {max_encode})")
+    tokenizer, embed_model, encode_fn = build_encoder("gpt2", max_length=32)
     print("🔢 Encoding dataset into embeddings (CPU-friendly)...")
     short_list, long_list = [], []
     for i, item in enumerate(dataset):
     long_embeddings = torch.vstack(long_list)
     print(f"✅ Finished encoding {short_embeddings.shape[0]} prompts")
     model = GemmaTrainer(
         input_dim=short_embeddings.shape[1],
         hidden_dim=min(512, short_embeddings.shape[1]),
     print("✅ Training finished!")
     if push_to_hub:
+        print(f"📤 Pushing model to Hugging Face repo: {hf_repo} ...")
         model.save_flashpack(hf_repo, target_dtype=torch.float32, push_to_hub=True)
         print(f"✅ Model pushed to HF repo: {hf_repo}")
     return model, dataset, embed_model, tokenizer, long_embeddings
 # ============================================================
+# 4️⃣ Load trained model from HF repo
+# ============================================================
+def load_flashpack_model(hf_repo="rahul7star/FlashPack"):
+    model = GemmaTrainer.load_flashpack(hf_repo)
+    model.eval()
+    tokenizer, embed_model, encode_fn = build_encoder("gpt2", max_length=32)
+    return model, tokenizer, embed_model
+# ============================================================
+# 5️⃣ Run training + push, then reload
 # ============================================================
+model, dataset, embed_model, tokenizer, long_embeddings = train_and_push_flashpack(
+    max_encode=1000,  # CPU-safe
+    push_to_hub=True
 )
+# reload to ensure FlashPack workflow works
+model, tokenizer, embed_model = load_flashpack_model("rahul7star/FlashPack")
 # ============================================================
+# 6️⃣ Inference helpers
 # ============================================================
 @torch.no_grad()
 def encode_for_inference(prompt: str) -> torch.Tensor:
     return chat_history
 # ============================================================
+# 7️⃣ Gradio UI
 # ============================================================
 with gr.Blocks(title="Prompt Enhancer – FlashPack (CPU)", theme=gr.themes.Soft()) as demo:
     gr.Markdown(
     clear_btn.click(lambda: [], None, chatbot)
 # ============================================================
+# 8️⃣ Launch
 # ============================================================
 if __name__ == "__main__":
     demo.launch(show_error=True)