| import gradio as gr |
| from huggingface_hub import hf_hub_download |
| from llama_cpp import Llama |
| from transformers import pipeline |
| import torch |
| import os |
| import csv |
|
|
| |
| MODEL_INSTRUCT_REPO = "tensorblock/Phi-4-mini-instruct-abliterated-GGUF" |
| MODEL_INSTRUCT_FILE = "Phi-4-mini-instruct-abliterated-Q5_K_M.gguf" |
|
|
| MODEL_REASONING_REPO = "Triangle104/Phi-4-mini-reasoning-abliterated-Q5_K_M-GGUF" |
| MODEL_REASONING_FILE = "phi-4-mini-reasoning-abliterated-q5_k_m.gguf" |
|
|
| |
| instruct_path = hf_hub_download(repo_id=MODEL_INSTRUCT_REPO, filename=MODEL_INSTRUCT_FILE) |
| reasoning_path = hf_hub_download(repo_id=MODEL_REASONING_REPO, filename=MODEL_REASONING_FILE) |
|
|
| print("Loading models...") |
| llm_instruct = Llama(instruct_path, n_ctx=4096, n_threads=8, n_gpu_layers=0) |
| llm_reasoning = Llama(reasoning_path, n_ctx=4096, n_threads=8, n_gpu_layers=0) |
|
|
| |
| captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base", device=-1) |
|
|
| print("Models loaded!") |
|
|
| def generate_response(prompt, model_choice, max_new_tokens=256, temperature=0.7, image=None): |
| if model_choice == "General (Instruct)": |
| llm = llm_instruct |
| else: |
| llm = llm_reasoning |
| |
| |
| image_desc = "" |
| if image is not None: |
| captions = captioner(image) |
| image_desc = captions[0]['generated_text'] + "\n" |
| |
| full_prompt = image_desc + prompt |
| |
| |
| |
| formatted_prompt = f"<|user|>\n{full_prompt}<|end|>\n<|assistant|>" |
| |
| response = llm.create_completion( |
| formatted_prompt, |
| max_tokens=max_new_tokens, |
| temperature=temperature, |
| top_p=0.9, |
| stop=["<|end|>"] |
| ) |
| |
| reply = response['choices'][0]['text'].strip() |
| |
| |
| with open('user_data.csv', 'a', newline='', encoding='utf-8') as f: |
| writer = csv.writer(f) |
| writer.writerow([prompt, reply, model_choice, max_new_tokens, temperature]) |
| |
| return reply |
|
|
| |
| with gr.Blocks(title="Phi-4-mini Abliterated Chat (Switchable)") as demo: |
| gr.Markdown("# Phi-4-mini Abliterated Chat\nSwitch between general instruct and reasoning-focused versions.") |
| |
| with gr.Row(): |
| model_dropdown = gr.Dropdown( |
| choices=["General (Instruct)", "Reasoning Optimized"], |
| value="General (Instruct)", |
| label="Model Variant" |
| ) |
| max_tokens_slider = gr.Slider(128, 256, value=256, step=1, label="Max New Tokens") |
| |
| chatbot = gr.Chatbot(height=500) |
| msg = gr.Textbox(label="Your message", placeholder="Ask anything...") |
| image_upload = gr.Image(label="Upload Image", type="pil") |
| clear = gr.Button("Clear") |
| |
| def respond(message, chat_history, model_choice, max_new_tokens, image): |
| response = generate_response(message, model_choice, max_new_tokens=max_new_tokens, image=image) |
| chat_history.append((message, response)) |
| return "", chat_history, None |
| |
| msg.submit(respond, [msg, chatbot, model_dropdown, max_tokens_slider, image_upload], [msg, chatbot, image_upload]) |
| clear.click(lambda: (None, None), None, [chatbot, image_upload], queue=False) |
|
|
| demo.launch() |