ShiroOnigami23
/

Ghost-Codex-Offline

+import pandas as pd
+import pickle
+from sklearn.feature_extraction.text import TfidfVectorizer
+# 1. Load your 5000 samples
+print("👻 Loading Rosetta Stone Dataset...")
+try:
+    df = pd.read_csv("rosetta_code_dataset.csv")
+    print(f"   -> Loaded {len(df)} examples.")
+except:
+    print("Error: Could not find rosetta_code_dataset.csv")
+    exit()
+# 2. Train the Brain (TF-IDF Vectorizer)
+# This converts English text ("fibonacci in java") into Math Numbers
+print("🧠 Training the Ghost Engine...")
+vectorizer = TfidfVectorizer()
+tfidf_matrix = vectorizer.fit_transform(df['prompt'].values.astype('U'))
+# 3. Save the Brain file
+# We save the Vectorizer (translator), Matrix (memory), and Code (answers)
+output_file = "ghost_brain.pkl"
+with open(output_file, "wb") as f:
+    pickle.dump((vectorizer, tfidf_matrix, df['code'].values), f)
+print(f"✅ SUCCESS! Brain saved as '{output_file}'")
+print(f"   Size: {os.path.getsize(output_file) / 1024:.2f} KB (Tiny!)")
+print("   Copy this file + ghost_coder.py to your USB stick.")