import gradio as gr import torch from transformers import AutoTokenizer, TextIteratorStreamer, AutoModelForCausalLM, AutoConfig import requests import json from peft import PeftModel from threading import Thread import os # --- Configuration --- # The model is loaded from the Hugging Face Hub BASE_MODEL_PATH = "algorythmtechnologies/zenith_coder_v1.1" # Name of the environment variable for the Hugging Face token HF_TOKEN_ENV_VAR = "HUGGING_FACE_HUB_TOKEN" # --- Model Loading --- # Get the Hugging Face token from environment variables hf_token = os.environ.get(HF_TOKEN_ENV_VAR) if not hf_token: raise ValueError(f"Environment variable {HF_TOKEN_ENV_VAR} not set. Please set it in your Space secrets.") # Load the tokenizer from the Hub, using the token for private models tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_PATH, use_auth_token=hf_token) from transformers import AutoConfig # Load the config from the user's repo config = AutoConfig.from_pretrained(BASE_MODEL_PATH, use_auth_token=hf_token) # Correct the base model path in the config config._name_or_path = "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct" # Load the base model from the Hub using the corrected config base_model = AutoModelForCausalLM.from_pretrained( BASE_MODEL_PATH, config=config, trust_remote_code=True, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16, use_auth_token=hf_token ) # Move model to the appropriate device (GPU if available, otherwise CPU) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") base_model.to(device) # The PEFT model is loaded from the same repository. # PeftModel will automatically find the adapter configuration. model = PeftModel.from_pretrained(base_model, BASE_MODEL_PATH, use_auth_token=hf_token) model.eval() # --- Web Search Function --- def search(query): """Performs a web search using the Serper API.""" serper_api_key = os.environ.get("SERPER_API_KEY") if not serper_api_key: return "SERPER_API_KEY not found. Please set it as an environment variable in your Hugging Face Space secrets." url = "https://google.serper.dev/search" payload = json.dumps({"q": query}) headers = { 'X-API-KEY': serper_api_key, 'Content-Type': 'application/json' } try: response = requests.request("POST", url, headers=headers, data=payload) response.raise_for_status() results = response.json() return results.get('organic', []) except requests.exceptions.RequestException as e: return f"Error during web search: {e}" # --- Response Generation --- def generate_response(message, history): """Generates a response from the model, with optional web search.""" # Handle web search command if message.lower().startswith("search for "): search_query = message[len("search for "):] search_results = search(search_query) if isinstance(search_results, str): # Error case yield search_results return if not search_results: yield "No search results found." return context = " ".join([res.get('snippet', '') for res in search_results[:5]]) # Prepend context to the user's message message = f"Based on the following search results, answer the user's query.\nContext: {context}\n\nUser Query: {message}" # Format chat history and new message using the tokenizer's chat template chat_messages = [] for user_msg, assistant_msg in history: chat_messages.append({"role": "user", "content": user_msg}) if assistant_msg: chat_messages.append({"role": "assistant", "content": assistant_msg}) chat_messages.append({"role": "user", "content": message}) # Apply the chat template prompt = tokenizer.apply_chat_template(chat_messages, tokenize=False, add_generation_prompt=True) inputs = tokenizer(prompt, return_tensors="pt").to(device) streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024) # Run generation in a separate thread thread = Thread(target=model.generate, kwargs=generation_kwargs) thread.start() # Yield generated text as it becomes available generated_text = "" for new_text in streamer: generated_text += new_text yield generated_text # --- Gradio UI --- with gr.Blocks(theme=gr.themes.Soft(primary_hue="sky", secondary_hue="blue")) as demo: gr.HTML("