st192011 commited on
Commit
6f7a25d
·
verified ·
1 Parent(s): 0aa64c9

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -0
app.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import subprocess
3
+ import os
4
+
5
+ SYSTEM_INSTRUCTION = (
6
+ "You are a Socratic assistant. Do not answer questions directly. "
7
+ "Instead, respond exclusively with 3 deep, reflective questions. "
8
+ "Then generate stop token"
9
+ )
10
+ MODEL_PATH = "models/BitNet-b1.58-2B-4T/ggml-model-i2_s.gguf"
11
+
12
+ def clean_socratic_output(raw_stdout, full_prompt):
13
+ generated_text = raw_stdout.replace(full_prompt, "").strip()
14
+ stop_markers = ["Stop token", "stop token", "Stop.", "stop.", "Response:", "Response"]
15
+
16
+ lowest_index = len(generated_text)
17
+ marker_found = False
18
+ for marker in stop_markers:
19
+ idx = generated_text.find(marker)
20
+ if idx != -1 and idx < lowest_index:
21
+ lowest_index = idx
22
+ marker_found = True
23
+
24
+ if marker_found:
25
+ generated_text = generated_text[:lowest_index].strip()
26
+ if generated_text.endswith(("\n4.", "\n4", "4.", "4")):
27
+ generated_text = generated_text.rsplit("\n4", 1)[0].strip()
28
+ return generated_text
29
+
30
+ def socratic_chat(user_query):
31
+ if not user_query.strip():
32
+ return "Please enter a valid question."
33
+
34
+ formatted_chat_prompt = f"System: {SYSTEM_INSTRUCTION}\nUser: {user_query}\nAssistant:"
35
+
36
+ cmd = [
37
+ "python3", "run_inference.py",
38
+ "-m", MODEL_PATH,
39
+ "-p", formatted_chat_prompt,
40
+ "-n", "100",
41
+ "-temp", "0.4",
42
+ "-t", "2" # Matches HF Space 2-vCPU hardware allocation
43
+ ]
44
+
45
+ execution_result = subprocess.run(cmd, capture_output=True, text=True)
46
+
47
+ if execution_result.stdout:
48
+ return clean_socratic_output(execution_result.stdout, formatted_chat_prompt)
49
+ else:
50
+ return f"Error running model: {execution_result.stderr}"
51
+
52
+ # Build the Gradio UI Layout
53
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
54
+ gr.Markdown("# 🧠 1-Bit Local Socratic Assistant")
55
+ gr.Markdown("This interface is running an official Microsoft BitNet 1.58-bit model on an optimized CPU backend.")
56
+
57
+ with gr.Row():
58
+ with gr.Column():
59
+ input_text = gr.Textbox(label="What concept are you exploring?", placeholder="e.g., What is justice?")
60
+ submit_btn = gr.Button("Generate Socratic Reflection", variant="primary")
61
+ with gr.Column():
62
+ output_text = gr.Textbox(label="Socratic Feedback", lines=6)
63
+
64
+ submit_btn.click(fn=socratic_chat, inputs=input_text, outputs=output_text)
65
+ input_text.submit(fn=socratic_chat, inputs=input_text, outputs=output_text)
66
+
67
+ if __name__ == "__main__":
68
+ demo.launch(server_name="0.0.0.0", server_port=7860)