Spaces:

wizhut
/

SteinAI

Build error

App Files Files Community

SteinAI / app.py

wizhut

Update app.py

931be52 verified 2 months ago

raw

history blame contribute delete

3.47 kB

	import gradio as gr
	from huggingface_hub import hf_hub_download
	from llama_cpp import Llama
	from transformers import pipeline
	import torch
	import os
	import csv

	# Download GGUF files
	MODEL_INSTRUCT_REPO = "tensorblock/Phi-4-mini-instruct-abliterated-GGUF"
	MODEL_INSTRUCT_FILE = "Phi-4-mini-instruct-abliterated-Q5_K_M.gguf"

	MODEL_REASONING_REPO = "Triangle104/Phi-4-mini-reasoning-abliterated-Q5_K_M-GGUF"
	MODEL_REASONING_FILE = "phi-4-mini-reasoning-abliterated-q5_k_m.gguf" # Assuming file name based on convention

	# Download if not present
	instruct_path = hf_hub_download(repo_id=MODEL_INSTRUCT_REPO, filename=MODEL_INSTRUCT_FILE)
	reasoning_path = hf_hub_download(repo_id=MODEL_REASONING_REPO, filename=MODEL_REASONING_FILE)

	print("Loading models...")
	llm_instruct = Llama(instruct_path, n_ctx=4096, n_threads=8, n_gpu_layers=0) # Adjust n_gpu_layers if GPU available
	llm_reasoning = Llama(reasoning_path, n_ctx=4096, n_threads=8, n_gpu_layers=0)

	# Image captioner
	captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base", device=-1) # CPU

	print("Models loaded!")

	def generate_response(prompt, model_choice, max_new_tokens=256, temperature=0.7, image=None):
	if model_choice == "General (Instruct)":
	llm = llm_instruct
	else:
	llm = llm_reasoning

	# Handle image
	image_desc = ""
	if image is not None:
	captions = captioner(image)
	image_desc = captions[0]['generated_text'] + "\n"

	full_prompt = image_desc + prompt

	# Phi models use a chat template, but for llama.cpp, we need to format manually
	# Assuming the template is similar to Phi-3: <\|user\|>\n prompt <\|end\|>\n<\|assistant\|>
	formatted_prompt = f"<\|user\|>\n{full_prompt}<\|end\|>\n<\|assistant\|>"

	response = llm.create_completion(
	formatted_prompt,
	max_tokens=max_new_tokens,
	temperature=temperature,
	top_p=0.9,
	stop=["<\|end\|>"]
	)

	reply = response['choices'][0]['text'].strip()

	# Log
	with open('user_data.csv', 'a', newline='', encoding='utf-8') as f:
	writer = csv.writer(f)
	writer.writerow([prompt, reply, model_choice, max_new_tokens, temperature])

	return reply

	# Gradio interface
	with gr.Blocks(title="Phi-4-mini Abliterated Chat (Switchable)") as demo:
	gr.Markdown("# Phi-4-mini Abliterated Chat\nSwitch between general instruct and reasoning-focused versions.")

	with gr.Row():
	model_dropdown = gr.Dropdown(
	choices=["General (Instruct)", "Reasoning Optimized"],
	value="General (Instruct)",
	label="Model Variant"
	)
	max_tokens_slider = gr.Slider(128, 256, value=256, step=1, label="Max New Tokens")

	chatbot = gr.Chatbot(height=500)
	msg = gr.Textbox(label="Your message", placeholder="Ask anything...")
	image_upload = gr.Image(label="Upload Image", type="pil")
	clear = gr.Button("Clear")

	def respond(message, chat_history, model_choice, max_new_tokens, image):
	response = generate_response(message, model_choice, max_new_tokens=max_new_tokens, image=image)
	chat_history.append((message, response))
	return "", chat_history, None # Clear image after use

	msg.submit(respond, [msg, chatbot, model_dropdown, max_tokens_slider, image_upload], [msg, chatbot, image_upload])
	clear.click(lambda: (None, None), None, [chatbot, image_upload], queue=False)

	demo.launch()