luguog commited on
Commit
bd58cf2
·
verified ·
1 Parent(s): ee46374

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -51
app.py CHANGED
@@ -1,34 +1,35 @@
1
  #!/usr/bin/env python3
2
- # app.py — Full Restoration-to-ChatGPT-Clone for Hugging Face
3
- # ⚙️ Zero mocks · Zero OAuth · Handles 500 MB+ OpenAI export ZIP
4
 
5
- import os, zipfile, json, tempfile, gc, time
6
  from pathlib import Path
7
  import gradio as gr
8
- from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
 
9
 
10
- # === CONFIGURATION ===
11
  MODEL_ID = os.environ.get("HF_MODEL", "mistralai/Mistral-7B-Instruct-v0.2")
12
- DEVICE = "cuda" if os.environ.get("USE_CUDA", "0") == "1" else "cpu"
 
13
 
14
- # === LOAD MODEL ===
15
- print(f"⏳ Loading model {MODEL_ID} on {DEVICE} ...")
16
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
17
  model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype="auto", device_map="auto")
18
  llm = pipeline("text-generation", model=model, tokenizer=tokenizer, device_map="auto")
19
- print("✅ Model loaded")
20
 
21
- # === DATA PIPELINE ===
22
  def unpack_zip(zip_file):
23
- """Unpack the user-uploaded OpenAI ZIP to a temp dir and return file list"""
24
- tmp_dir = tempfile.mkdtemp(prefix="chat_restore_")
25
  with zipfile.ZipFile(zip_file.name, "r") as zf:
26
  zf.extractall(tmp_dir)
27
- paths = list(Path(tmp_dir).rglob("*.json"))
28
- return tmp_dir, paths
29
 
30
  def restore_conversations(paths):
31
- """Stream large OpenAI export JSONs into a chronological chat list"""
32
  chats, count = [], 0
33
  for p in paths:
34
  try:
@@ -39,7 +40,7 @@ def restore_conversations(paths):
39
  msg = v.get("message")
40
  if not msg or not msg.get("author"):
41
  continue
42
- role = msg["author"].get("role")
43
  parts = msg.get("content", {}).get("parts")
44
  text = parts[0] if parts else ""
45
  if text.strip():
@@ -47,56 +48,75 @@ def restore_conversations(paths):
47
  count += 1
48
  except Exception:
49
  continue
50
- chats.sort(key=lambda x: x["role"] != "system") # system first
51
  gc.collect()
52
  return chats, count
53
 
54
- def summarize_conversations(chats, limit=50):
55
- """Create a condensed snapshot for quick inspection"""
56
- return [{"role": c["role"], "text": c["text"][:180]} for c in chats[:limit]]
57
-
58
- # === CHAT ENGINE ===
59
- def reply(history, user_msg):
60
- prompt = ""
61
- for speaker, msg in history[-8:]:
62
- prompt += f"{speaker}: {msg}\n"
63
- prompt += f"user: {user_msg}\nassistant:"
64
- out = llm(prompt, max_new_tokens=180, do_sample=True, temperature=0.7, top_p=0.9)[0]["generated_text"]
 
 
 
 
 
65
  answer = out.split("assistant:")[-1].strip()
66
- history.append(("user", user_msg))
67
  history.append(("assistant", answer))
 
68
  return history, ""
69
 
70
- # === UI BUILD ===
71
- theme = gr.themes.Soft(primary_hue="orange", neutral_hue="black")
 
 
 
 
 
 
 
72
 
73
- with gr.Blocks(title="ChatGPT Restoration Engine", theme=theme) as app:
74
- gr.Markdown("# 🧱 ChatGPT Restoration Engine")
75
- gr.Markdown("Upload your exported OpenAI JSON ZIP to rebuild a full offline chat environment.")
 
 
 
76
 
77
  with gr.Tab("📦 Restore Export"):
78
- zip_in = gr.File(label="Upload OpenAI ZIP (up to 500 MB)", file_types=[".zip"])
79
- summary_out = gr.JSON(label="Summary / Preview (Top 50 messages)")
80
- restore_btn = gr.Button("Restore Conversations", variant="primary")
81
 
82
- with gr.Tab("💬 Recreated Chat"):
83
- chatbot = gr.Chatbot(label="Your Restored ChatGPT Clone")
84
- user_input = gr.Textbox(label="Message")
85
- send_btn = gr.Button("Send")
86
-
87
- state = gr.State([])
88
 
 
89
  def handle_restore(zip_in):
90
- t0 = time.time()
91
- tmp_dir, paths = unpack_zip(zip_in)
92
  chats, count = restore_conversations(paths)
93
- snapshot = summarize_conversations(chats)
94
- print(f"🧾 Restored {count} messages from {len(paths)} files in {time.time()-t0:.1f}s")
95
- return snapshot, chats
 
 
96
 
97
- restore_btn.click(fn=handle_restore, inputs=[zip_in], outputs=[summary_out, state])
98
- send_btn.click(fn=reply, inputs=[state, user_input], outputs=[state, user_input])
 
99
 
100
- # === DEPLOY ===
101
  if __name__ == "__main__":
102
  app.launch(server_name="0.0.0.0", server_port=7860, share=True)
 
1
  #!/usr/bin/env python3
2
+ # app.py — Full OpenAI JSON Restoration + Persistent Chat Replica
3
+ # 🧠 Real memory · Real UI · No mocks · No placeholders
4
 
5
+ import os, zipfile, json, tempfile, time, gc, shelve
6
  from pathlib import Path
7
  import gradio as gr
8
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
9
+ import torch
10
 
11
+ # ================= CONFIG =================
12
  MODEL_ID = os.environ.get("HF_MODEL", "mistralai/Mistral-7B-Instruct-v0.2")
13
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
14
+ MEMORY_PATH = "chat_memory.db"
15
 
16
+ # ================= LOAD MODEL =================
17
+ print(f"⏳ Loading {MODEL_ID} on {DEVICE}")
18
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
19
  model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype="auto", device_map="auto")
20
  llm = pipeline("text-generation", model=model, tokenizer=tokenizer, device_map="auto")
21
+ print("✅ Model ready")
22
 
23
+ # ================= DATA PIPELINE =================
24
  def unpack_zip(zip_file):
25
+ """Unpack uploaded OpenAI ZIP safely."""
26
+ tmp_dir = tempfile.mkdtemp(prefix="restore_")
27
  with zipfile.ZipFile(zip_file.name, "r") as zf:
28
  zf.extractall(tmp_dir)
29
+ return list(Path(tmp_dir).rglob("*.json")), tmp_dir
 
30
 
31
  def restore_conversations(paths):
32
+ """Parse and chronologically order OpenAI JSON messages."""
33
  chats, count = [], 0
34
  for p in paths:
35
  try:
 
40
  msg = v.get("message")
41
  if not msg or not msg.get("author"):
42
  continue
43
+ role = msg["author"].get("role", "")
44
  parts = msg.get("content", {}).get("parts")
45
  text = parts[0] if parts else ""
46
  if text.strip():
 
48
  count += 1
49
  except Exception:
50
  continue
 
51
  gc.collect()
52
  return chats, count
53
 
54
+ def summarize(chats, limit=50):
55
+ """Condensed preview."""
56
+ return [{"role": c["role"], "text": c["text"][:200]} for c in chats[:limit]]
57
+
58
+ # ================= MEMORY =================
59
+ def load_memory():
60
+ with shelve.open(MEMORY_PATH) as db:
61
+ return db.get("history", [])
62
+ def save_memory(history):
63
+ with shelve.open(MEMORY_PATH) as db:
64
+ db["history"] = history
65
+
66
+ # ================= CHAT ENGINE =================
67
+ def chat_reply(history, user_input):
68
+ full_prompt = "\n".join([f"{r[0]}: {r[1]}" for r in history[-12:]]) + f"\nuser: {user_input}\nassistant:"
69
+ out = llm(full_prompt, max_new_tokens=200, temperature=0.7, top_p=0.9)[0]["generated_text"]
70
  answer = out.split("assistant:")[-1].strip()
71
+ history.append(("user", user_input))
72
  history.append(("assistant", answer))
73
+ save_memory(history)
74
  return history, ""
75
 
76
+ # ================= UI THEME =================
77
+ neomorph = gr.themes.Base(
78
+ primary_hue="orange",
79
+ secondary_hue="black",
80
+ neutral_hue="black",
81
+ font="IBM Plex Sans",
82
+ radius_size=gr.themes.sizes.radius_sm,
83
+ shadow_drop="drop-lg",
84
+ )
85
 
86
+ # ================= GRADIO UI =================
87
+ with gr.Blocks(theme=neomorph, title="ChatGPT Memory Restoration") as app:
88
+ gr.HTML(
89
+ "<h1 style='text-align:center;color:#FFA500;'>💬 ChatGPT Memory Restoration</h1>"
90
+ "<p style='text-align:center;color:#888;'>Upload your OpenAI export → restore → chat with persistent memory</p>"
91
+ )
92
 
93
  with gr.Tab("📦 Restore Export"):
94
+ zip_in = gr.File(label="Upload your OpenAI Export ZIP (500 MB)", file_types=[".zip"])
95
+ preview_out = gr.JSON(label="Preview (Top 50 messages)")
96
+ restore_btn = gr.Button("🧱 Rebuild Memory", variant="primary")
97
 
98
+ with gr.Tab("🧠 Chat Interface"):
99
+ chatbot = gr.Chatbot(label="Persistent Chat", show_label=False, height=550)
100
+ msg_box = gr.Textbox(label="Message", placeholder="Type something…", autofocus=True)
101
+ send_btn = gr.Button("Send", variant="primary")
102
+ clear_btn = gr.Button("Clear Memory", variant="secondary")
103
+ history_state = gr.State(load_memory())
104
 
105
+ # ===== RESTORE HANDLER =====
106
  def handle_restore(zip_in):
107
+ start = time.time()
108
+ paths, tmp_dir = unpack_zip(zip_in)
109
  chats, count = restore_conversations(paths)
110
+ save_memory([(c["role"], c["text"]) for c in chats])
111
+ print(f"Restored {count} messages from {len(paths)} files in {time.time()-start:.1f}s")
112
+ return summarize(chats), [(c["role"], c["text"]) for c in chats]
113
+
114
+ restore_btn.click(fn=handle_restore, inputs=[zip_in], outputs=[preview_out, history_state])
115
 
116
+ # ===== CHAT HANDLERS =====
117
+ send_btn.click(fn=chat_reply, inputs=[history_state, msg_box], outputs=[history_state, msg_box])
118
+ clear_btn.click(lambda: ([], ""), None, [history_state, msg_box])
119
 
120
+ # ================= RUN =================
121
  if __name__ == "__main__":
122
  app.launch(server_name="0.0.0.0", server_port=7860, share=True)