Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| # 1. Define your repository | |
| repo_name = "Phase-Technologies/qwen2.5-math-1.5b-generalized-merged" | |
| print("Loading model into memory... This takes a minute on a CPU.") | |
| # 2. Load the Tokenizer and Model | |
| # We load in standard precision because the free tier does not have a GPU for 4-bit | |
| tokenizer = AutoTokenizer.from_pretrained(repo_name) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| repo_name, | |
| device_map="cpu", | |
| torch_dtype=torch.float32 | |
| ) | |
| # 3. Define the inference function | |
| def generate_response(prompt): | |
| # Apply your training template | |
| universal_prompt = "### Instruction:\n{}\n\n### Response:\n{}" | |
| formatted_prompt = universal_prompt.format(prompt, "") | |
| # Tokenize input | |
| inputs = tokenizer( | |
| formatted_prompt, | |
| return_tensors="pt" | |
| ).to(model.device) | |
| # Generate output | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=1024, | |
| max_length=None, | |
| use_cache=True, | |
| repetition_penalty=1.15, | |
| pad_token_id=tokenizer.eos_token_id | |
| ) | |
| # Decode and format the response | |
| response = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0] | |
| final_answer = response.split("### Response:\n")[-1] | |
| return final_answer | |
| # 4. Build the Gradio Web UI | |
| with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
| gr.Markdown("# 🧠 Phase-Technologies: Generalized Qwen-Math (1.5B)") | |
| gr.Markdown("An ultra-lightweight reasoning model fine-tuned for graduate-level proofs and conversational instruction-following.") | |
| with gr.Row(): | |
| with gr.Column(): | |
| user_input = gr.Textbox( | |
| lines=5, | |
| label="Your Prompt", | |
| placeholder="E.g., What is 2+2? OR Provide a step-by-step proof for the eigenvalues of [[2,1],[1,2]]..." | |
| ) | |
| submit_btn = gr.Button("Generate Response", variant="primary") | |
| with gr.Column(): | |
| output_box = gr.Textbox(lines=15, label="Model Output") | |
| submit_btn.click(fn=generate_response, inputs=user_input, outputs=output_box) | |
| # 5. Launch the app | |
| demo.launch() | |