Spaces:
Build error
Build error
File size: 5,684 Bytes
e335e03 3a3e728 e335e03 40a53b0 e335e03 3a3e728 e335e03 3a3e728 e335e03 40a53b0 e335e03 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 |
import gradio as gr
import torch
from transformers import AutoTokenizer, TextIteratorStreamer, AutoModelForCausalLM, AutoConfig
import requests
import json
from peft import PeftModel
from threading import Thread
import os
# --- Configuration ---
# The model is loaded from the Hugging Face Hub
BASE_MODEL_PATH = "algorythmtechnologies/zenith_coder_v1.1"
# Name of the environment variable for the Hugging Face token
HF_TOKEN_ENV_VAR = "HUGGING_FACE_HUB_TOKEN"
# --- Model Loading ---
# Get the Hugging Face token from environment variables
hf_token = os.environ.get(HF_TOKEN_ENV_VAR)
if not hf_token:
raise ValueError(f"Environment variable {HF_TOKEN_ENV_VAR} not set. Please set it in your Space secrets.")
# Load the tokenizer from the Hub, using the token for private models
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_PATH, use_auth_token=hf_token)
from transformers import AutoConfig
# Load the config from the user's repo
config = AutoConfig.from_pretrained(BASE_MODEL_PATH, use_auth_token=hf_token)
# Correct the base model path in the config
config._name_or_path = "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct"
# Load the base model from the Hub using the corrected config
base_model = AutoModelForCausalLM.from_pretrained(
BASE_MODEL_PATH,
config=config,
trust_remote_code=True,
low_cpu_mem_usage=True,
torch_dtype=torch.bfloat16,
use_auth_token=hf_token
)
# Move model to the appropriate device (GPU if available, otherwise CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
base_model.to(device)
# The PEFT model is loaded from the same repository.
# PeftModel will automatically find the adapter configuration.
model = PeftModel.from_pretrained(base_model, BASE_MODEL_PATH, use_auth_token=hf_token)
model.eval()
# --- Web Search Function ---
def search(query):
"""Performs a web search using the Serper API."""
serper_api_key = os.environ.get("SERPER_API_KEY")
if not serper_api_key:
return "SERPER_API_KEY not found. Please set it as an environment variable in your Hugging Face Space secrets."
url = "https://google.serper.dev/search"
payload = json.dumps({"q": query})
headers = {
'X-API-KEY': serper_api_key,
'Content-Type': 'application/json'
}
try:
response = requests.request("POST", url, headers=headers, data=payload)
response.raise_for_status()
results = response.json()
return results.get('organic', [])
except requests.exceptions.RequestException as e:
return f"Error during web search: {e}"
# --- Response Generation ---
def generate_response(message, history):
"""Generates a response from the model, with optional web search."""
# Handle web search command
if message.lower().startswith("search for "):
search_query = message[len("search for "):]
search_results = search(search_query)
if isinstance(search_results, str): # Error case
yield search_results
return
if not search_results:
yield "No search results found."
return
context = " ".join([res.get('snippet', '') for res in search_results[:5]])
# Prepend context to the user's message
message = f"Based on the following search results, answer the user's query.\nContext: {context}\n\nUser Query: {message}"
# Format chat history and new message using the tokenizer's chat template
chat_messages = []
for user_msg, assistant_msg in history:
chat_messages.append({"role": "user", "content": user_msg})
if assistant_msg:
chat_messages.append({"role": "assistant", "content": assistant_msg})
chat_messages.append({"role": "user", "content": message})
# Apply the chat template
prompt = tokenizer.apply_chat_template(chat_messages, tokenize=False, add_generation_prompt=True)
inputs = tokenizer(prompt, return_tensors="pt").to(device)
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024)
# Run generation in a separate thread
thread = Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()
# Yield generated text as it becomes available
generated_text = ""
for new_text in streamer:
generated_text += new_text
yield generated_text
# --- Gradio UI ---
with gr.Blocks(theme=gr.themes.Soft(primary_hue="sky", secondary_hue="blue")) as demo:
gr.HTML("<h1 align='center'>Zenith V1.1 Coder</h1>")
gr.Markdown("This Space is running [zenith_coder_v1.1](https://huggingface.co/algorythmtechnologies/zenith_coder_v1.1).<br>You can ask coding questions or use the 'search for <query>' command to browse the web.")
gr.ChatInterface(
generate_response,
chatbot=gr.Chatbot(
height=600,
avatar_images=(None, "https://i.imgur.com/9kAC4pG.png"),
bubble_full_width=False,
),
textbox=gr.Textbox(
placeholder="Ask me a question or type 'search for <your query>'...",
container=False,
scale=7,
),
submit_btn="Send",
retry_btn=None,
undo_btn=None,
clear_btn="Clear History",
)
if __name__ == "__main__":
# Before launching, remind the user to set the token if it's not found.
if not os.environ.get(HF_TOKEN_ENV_VAR):
print(f"CRITICAL: Environment variable {HF_TOKEN_ENV_VAR} not found.")
print("Please set this as a secret in your Hugging Face Space settings.")
demo.launch(share=True) |