SteinAI / app.py
wizhut's picture
Update app.py
931be52 verified
import gradio as gr
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
from transformers import pipeline
import torch
import os
import csv
# Download GGUF files
MODEL_INSTRUCT_REPO = "tensorblock/Phi-4-mini-instruct-abliterated-GGUF"
MODEL_INSTRUCT_FILE = "Phi-4-mini-instruct-abliterated-Q5_K_M.gguf"
MODEL_REASONING_REPO = "Triangle104/Phi-4-mini-reasoning-abliterated-Q5_K_M-GGUF"
MODEL_REASONING_FILE = "phi-4-mini-reasoning-abliterated-q5_k_m.gguf" # Assuming file name based on convention
# Download if not present
instruct_path = hf_hub_download(repo_id=MODEL_INSTRUCT_REPO, filename=MODEL_INSTRUCT_FILE)
reasoning_path = hf_hub_download(repo_id=MODEL_REASONING_REPO, filename=MODEL_REASONING_FILE)
print("Loading models...")
llm_instruct = Llama(instruct_path, n_ctx=4096, n_threads=8, n_gpu_layers=0) # Adjust n_gpu_layers if GPU available
llm_reasoning = Llama(reasoning_path, n_ctx=4096, n_threads=8, n_gpu_layers=0)
# Image captioner
captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base", device=-1) # CPU
print("Models loaded!")
def generate_response(prompt, model_choice, max_new_tokens=256, temperature=0.7, image=None):
if model_choice == "General (Instruct)":
llm = llm_instruct
else:
llm = llm_reasoning
# Handle image
image_desc = ""
if image is not None:
captions = captioner(image)
image_desc = captions[0]['generated_text'] + "\n"
full_prompt = image_desc + prompt
# Phi models use a chat template, but for llama.cpp, we need to format manually
# Assuming the template is similar to Phi-3: <|user|>\n prompt <|end|>\n<|assistant|>
formatted_prompt = f"<|user|>\n{full_prompt}<|end|>\n<|assistant|>"
response = llm.create_completion(
formatted_prompt,
max_tokens=max_new_tokens,
temperature=temperature,
top_p=0.9,
stop=["<|end|>"]
)
reply = response['choices'][0]['text'].strip()
# Log
with open('user_data.csv', 'a', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
writer.writerow([prompt, reply, model_choice, max_new_tokens, temperature])
return reply
# Gradio interface
with gr.Blocks(title="Phi-4-mini Abliterated Chat (Switchable)") as demo:
gr.Markdown("# Phi-4-mini Abliterated Chat\nSwitch between general instruct and reasoning-focused versions.")
with gr.Row():
model_dropdown = gr.Dropdown(
choices=["General (Instruct)", "Reasoning Optimized"],
value="General (Instruct)",
label="Model Variant"
)
max_tokens_slider = gr.Slider(128, 256, value=256, step=1, label="Max New Tokens")
chatbot = gr.Chatbot(height=500)
msg = gr.Textbox(label="Your message", placeholder="Ask anything...")
image_upload = gr.Image(label="Upload Image", type="pil")
clear = gr.Button("Clear")
def respond(message, chat_history, model_choice, max_new_tokens, image):
response = generate_response(message, model_choice, max_new_tokens=max_new_tokens, image=image)
chat_history.append((message, response))
return "", chat_history, None # Clear image after use
msg.submit(respond, [msg, chatbot, model_dropdown, max_tokens_slider, image_upload], [msg, chatbot, image_upload])
clear.click(lambda: (None, None), None, [chatbot, image_upload], queue=False)
demo.launch()