Phase-Technologies commited on
Commit
dccfcad
·
verified ·
1 Parent(s): 2d6edc7

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -0
app.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
4
+
5
+ # 1. Define your repository
6
+ repo_name = "Phase-Technologies/qwen2.5-math-1.5b-generalized-merged"
7
+
8
+ print("Loading model into memory... This takes a minute on a CPU.")
9
+
10
+ # 2. Load the Tokenizer and Model
11
+ # We load in standard precision because the free tier does not have a GPU for 4-bit
12
+ tokenizer = AutoTokenizer.from_pretrained(repo_name)
13
+ model = AutoModelForCausalLM.from_pretrained(
14
+ repo_name,
15
+ device_map="cpu",
16
+ torch_dtype=torch.float32
17
+ )
18
+
19
+ # 3. Define the inference function
20
+ def generate_response(prompt):
21
+ # Apply your training template
22
+ universal_prompt = "### Instruction:\n{}\n\n### Response:\n{}"
23
+ formatted_prompt = universal_prompt.format(prompt, "")
24
+
25
+ # Tokenize input
26
+ inputs = tokenizer(
27
+ formatted_prompt,
28
+ return_tensors="pt"
29
+ ).to(model.device)
30
+
31
+ # Generate output
32
+ with torch.no_grad():
33
+ outputs = model.generate(
34
+ **inputs,
35
+ max_new_tokens=1024,
36
+ max_length=None,
37
+ use_cache=True,
38
+ repetition_penalty=1.15,
39
+ pad_token_id=tokenizer.eos_token_id
40
+ )
41
+
42
+ # Decode and format the response
43
+ response = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
44
+ final_answer = response.split("### Response:\n")[-1]
45
+
46
+ return final_answer
47
+
48
+ # 4. Build the Gradio Web UI
49
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
50
+ gr.Markdown("# 🧠 Phase-Technologies: Generalized Qwen-Math (1.5B)")
51
+ gr.Markdown("An ultra-lightweight reasoning model fine-tuned for graduate-level proofs and conversational instruction-following.")
52
+
53
+ with gr.Row():
54
+ with gr.Column():
55
+ user_input = gr.Textbox(
56
+ lines=5,
57
+ label="Your Prompt",
58
+ placeholder="E.g., What is 2+2? OR Provide a step-by-step proof for the eigenvalues of [[2,1],[1,2]]..."
59
+ )
60
+ submit_btn = gr.Button("Generate Response", variant="primary")
61
+
62
+ with gr.Column():
63
+ output_box = gr.Textbox(lines=15, label="Model Output")
64
+
65
+ submit_btn.click(fn=generate_response, inputs=user_input, outputs=output_box)
66
+
67
+ # 5. Launch the app
68
+ demo.launch()