File size: 8,883 Bytes
f39d8b6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 |
# app.py
import gradio as gr
from huggingface_hub import InferenceClient
import os
DEFAULT_MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.1"
HF_TOKEN = os.getenv("HF_API_TOKEN") # Loads token if set as a secret
# --- Initialize Inference Client ---
client = None
def get_inference_client(model_name):
global client
# Initialize client if it hasn't been, or if model name changes
if client is None or client.model != model_name:
try:
# InferenceClient will use HF_TOKEN if it's not None,
# or try to infer token otherwise (e.g. from CLI login if running locally).
# If no token is found and the model requires one, the API call will fail.
client = InferenceClient(model=model_name, token=HF_TOKEN if HF_TOKEN else None)
print(f"InferenceClient initialized for {model_name}. Token {'provided' if HF_TOKEN else 'not explicitly provided'}.")
except Exception as e:
print(f"Failed to initialize InferenceClient for {model_name}: {e}")
return None
return client
# --- Evaluation Logic ---
def evaluate_understanding(prompt, response):
"""
Analyzes the model's response to give a basic evaluation of understanding.
This is a simple heuristic and not a comprehensive NLU assessment.
"""
if not response or response.strip() == "":
return "β Not Understood (Empty or whitespace response)"
response_lower = response.lower() # For case-insensitive checks
misunderstanding_keywords = [
"i'm sorry", "i apologize", "i cannot", "i am unable", "unable to",
"i don't understand", "could you please rephrase", "i'm not sure i follow",
"that's not clear", "i do not have enough information", "as an ai language model, i don't",
"i'm not programmed to", "i lack the ability to"
]
for keyword in misunderstanding_keywords:
if keyword in response_lower:
return f"β οΈ Potentially Not Understood (Contains: '{keyword}')"
if len(prompt.split()) > 7 and len(response.split()) < 10:
return "β οΈ Potentially Not Understood (Response seems too short for the prompt)"
if prompt.lower() in response_lower and len(response_lower) < len(prompt.lower()) * 1.5 :
if len(prompt.split()) > 5 :
return "β οΈ Potentially Not Understood (Response might be echoing the prompt)"
return "βοΈ Likely Understood"
# --- Core Logic: Query Model and Evaluate ---
def query_model_and_evaluate(user_prompt, model_name_to_use):
"""
Sends the prompt to the model, gets the response, and evaluates it.
"""
if not user_prompt or user_prompt.strip() == "":
return "Please enter a prompt.", "Evaluation N/A", model_name_to_use
# Note: The explicit block for Llama models without HF_TOKEN has been removed.
# The InferenceClient will attempt the call. If the model is gated and requires
# a token or terms acceptance, the API call itself will likely fail.
print(f"Querying model: {model_name_to_use}. HF_TOKEN {'is set' if HF_TOKEN else 'is NOT set/empty'}.")
current_client = get_inference_client(model_name_to_use)
if current_client is None:
error_msg = f"Error: Could not initialize the model API client for {model_name_to_use}. Check logs. This might be due to the model requiring authentication (like a token or accepting terms on Hugging Face) which was not available or successful."
return error_msg, "Evaluation N/A", model_name_to_use
try:
if "mistral" in model_name_to_use.lower() and "instruct" in model_name_to_use.lower():
formatted_prompt = f"<s>[INST] {user_prompt.strip()} [/INST]"
elif "llama-2" in model_name_to_use.lower() and "chat" in model_name_to_use.lower():
formatted_prompt = (
f"[INST] <<SYS>>\nYou are a helpful assistant. Your goal is to understand the user's prompt and respond accurately and relevantly.\n"
f"<</SYS>>\n\n{user_prompt.strip()} [/INST]"
)
else:
formatted_prompt = user_prompt.strip()
params = {
"max_new_tokens": 300,
"temperature": 0.6,
"top_p": 0.9,
"repetition_penalty": 1.1,
"do_sample": True,
"return_full_text": False
}
model_response_text = current_client.text_generation(formatted_prompt, **params)
if not model_response_text:
model_response_text = ""
except Exception as e:
error_message = f"Error calling model API for {model_name_to_use}: {str(e)}. This can happen if the model is gated, requires a Hugging Face token, or if you need to accept its terms of use on the Hugging Face website."
print(error_message)
return error_message, "Evaluation N/A", model_name_to_use
understanding_evaluation = evaluate_understanding(user_prompt, model_response_text)
return model_response_text, understanding_evaluation, model_name_to_use
# --- Gradio Interface Definition ---
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="orange")) as demo:
gr.Markdown(
f"""
# π― Model Prompt Understanding Test
Enter a prompt for the selected language model. The application will send this to the model via Hugging Face's Inference API.
The model's response will be analyzed to provide a **basic heuristic assessment** of its understanding.
**Selected Model:** <span id='current-model-display'>{DEFAULT_MODEL_NAME}</span>
"""
)
current_model_name_state = gr.State(DEFAULT_MODEL_NAME)
with gr.Row():
user_input_prompt = gr.Textbox(
label="βοΈ Enter your Prompt:",
placeholder="e.g., Explain the concept of zero-shot learning in 3 sentences.",
lines=4,
scale=3
)
submit_button = gr.Button("π Submit Prompt and Evaluate", variant="primary")
gr.Markdown("---")
gr.Markdown("### π€ Model Response & Evaluation")
with gr.Row():
with gr.Column(scale=2):
model_output_response = gr.Textbox(
label="π Model's Response:",
lines=10,
interactive=False,
show_copy_button=True
)
with gr.Column(scale=1):
evaluation_output = gr.Textbox(
label="π§ Understanding Evaluation:",
lines=2,
interactive=False,
show_copy_button=True
)
displayed_model = gr.Textbox(
label="βοΈ Model Used for this Response:",
interactive=False,
lines=1
)
submit_button.click(
fn=query_model_and_evaluate,
inputs=[user_input_prompt, current_model_name_state],
outputs=[model_output_response, evaluation_output, displayed_model]
)
gr.Markdown(
"""
---
**Disclaimer:**
* The 'Understanding Evaluation' is a very basic automated heuristic.
* **Using Models:** This app will attempt to connect to the selected model. Some models (especially gated ones like Llama-2) may require you to have a Hugging Face account, accept their terms of use on the Hugging Face website, and might implicitly require a valid `HF_TOKEN` associated with your account (even if not explicitly set as a secret in this Space). If a model call fails, it could be due to these reasons.
* Response quality depends heavily on the chosen model and the clarity of your prompt.
"""
)
gr.Examples(
examples=[
["Explain the difference between supervised and unsupervised machine learning.", DEFAULT_MODEL_NAME],
["Write a short poem about a curious robot.", DEFAULT_MODEL_NAME],
["What are the main challenges in developing AGI?", DEFAULT_MODEL_NAME],
["Summarize the plot of 'War and Peace' in one paragraph.", DEFAULT_MODEL_NAME],
["asdfjkl; qwerpoiu", DEFAULT_MODEL_NAME]
],
inputs=[user_input_prompt, current_model_name_state],
outputs=[model_output_response, evaluation_output, displayed_model],
fn=query_model_and_evaluate,
cache_examples=False,
label="π‘ Example Prompts (click to try)"
)
if __name__ == "__main__":
print("Attempting to launch Gradio demo...")
print(f"Default model: {DEFAULT_MODEL_NAME}")
if HF_TOKEN:
print("HF_TOKEN is set.")
else:
print("HF_TOKEN is NOT set. Some models (especially gated ones like Llama) might require a token or prior agreement to terms on the Hugging Face website to function correctly. The app will attempt to run, but API calls may fail.")
demo.launch()
|