Spaces:
Build error
Build error
| import gradio as gr | |
| import torch | |
| from transformers import AutoTokenizer, TextIteratorStreamer, AutoModelForCausalLM, AutoConfig | |
| import requests | |
| import json | |
| from peft import PeftModel | |
| from threading import Thread | |
| import os | |
| # --- Configuration --- | |
| # The model is loaded from the Hugging Face Hub | |
| BASE_MODEL_PATH = "algorythmtechnologies/zenith_coder_v1.1" | |
| # Name of the environment variable for the Hugging Face token | |
| HF_TOKEN_ENV_VAR = "HUGGING_FACE_HUB_TOKEN" | |
| # --- Model Loading --- | |
| # Get the Hugging Face token from environment variables | |
| hf_token = os.environ.get(HF_TOKEN_ENV_VAR) | |
| if not hf_token: | |
| raise ValueError(f"Environment variable {HF_TOKEN_ENV_VAR} not set. Please set it in your Space secrets.") | |
| # Load the tokenizer from the Hub, using the token for private models | |
| tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_PATH, use_auth_token=hf_token) | |
| from transformers import AutoConfig | |
| # Load the config from the user's repo | |
| config = AutoConfig.from_pretrained(BASE_MODEL_PATH, use_auth_token=hf_token) | |
| # Correct the base model path in the config | |
| config._name_or_path = "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct" | |
| # Load the base model from the Hub using the corrected config | |
| base_model = AutoModelForCausalLM.from_pretrained( | |
| BASE_MODEL_PATH, | |
| config=config, | |
| trust_remote_code=True, | |
| low_cpu_mem_usage=True, | |
| torch_dtype=torch.bfloat16, | |
| use_auth_token=hf_token | |
| ) | |
| # Move model to the appropriate device (GPU if available, otherwise CPU) | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| base_model.to(device) | |
| # The PEFT model is loaded from the same repository. | |
| # PeftModel will automatically find the adapter configuration. | |
| model = PeftModel.from_pretrained(base_model, BASE_MODEL_PATH, use_auth_token=hf_token) | |
| model.eval() | |
| # --- Web Search Function --- | |
| def search(query): | |
| """Performs a web search using the Serper API.""" | |
| serper_api_key = os.environ.get("SERPER_API_KEY") | |
| if not serper_api_key: | |
| return "SERPER_API_KEY not found. Please set it as an environment variable in your Hugging Face Space secrets." | |
| url = "https://google.serper.dev/search" | |
| payload = json.dumps({"q": query}) | |
| headers = { | |
| 'X-API-KEY': serper_api_key, | |
| 'Content-Type': 'application/json' | |
| } | |
| try: | |
| response = requests.request("POST", url, headers=headers, data=payload) | |
| response.raise_for_status() | |
| results = response.json() | |
| return results.get('organic', []) | |
| except requests.exceptions.RequestException as e: | |
| return f"Error during web search: {e}" | |
| # --- Response Generation --- | |
| def generate_response(message, history): | |
| """Generates a response from the model, with optional web search.""" | |
| # Handle web search command | |
| if message.lower().startswith("search for "): | |
| search_query = message[len("search for "):] | |
| search_results = search(search_query) | |
| if isinstance(search_results, str): # Error case | |
| yield search_results | |
| return | |
| if not search_results: | |
| yield "No search results found." | |
| return | |
| context = " ".join([res.get('snippet', '') for res in search_results[:5]]) | |
| # Prepend context to the user's message | |
| message = f"Based on the following search results, answer the user's query.\nContext: {context}\n\nUser Query: {message}" | |
| # Format chat history and new message using the tokenizer's chat template | |
| chat_messages = [] | |
| for user_msg, assistant_msg in history: | |
| chat_messages.append({"role": "user", "content": user_msg}) | |
| if assistant_msg: | |
| chat_messages.append({"role": "assistant", "content": assistant_msg}) | |
| chat_messages.append({"role": "user", "content": message}) | |
| # Apply the chat template | |
| prompt = tokenizer.apply_chat_template(chat_messages, tokenize=False, add_generation_prompt=True) | |
| inputs = tokenizer(prompt, return_tensors="pt").to(device) | |
| streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) | |
| generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024) | |
| # Run generation in a separate thread | |
| thread = Thread(target=model.generate, kwargs=generation_kwargs) | |
| thread.start() | |
| # Yield generated text as it becomes available | |
| generated_text = "" | |
| for new_text in streamer: | |
| generated_text += new_text | |
| yield generated_text | |
| # --- Gradio UI --- | |
| with gr.Blocks(theme=gr.themes.Soft(primary_hue="sky", secondary_hue="blue")) as demo: | |
| gr.HTML("<h1 align='center'>Zenith V1.1 Coder</h1>") | |
| gr.Markdown("This Space is running [zenith_coder_v1.1](https://huggingface.co/algorythmtechnologies/zenith_coder_v1.1).<br>You can ask coding questions or use the 'search for <query>' command to browse the web.") | |
| gr.ChatInterface( | |
| generate_response, | |
| chatbot=gr.Chatbot( | |
| height=600, | |
| avatar_images=(None, "https://i.imgur.com/9kAC4pG.png"), | |
| bubble_full_width=False, | |
| ), | |
| textbox=gr.Textbox( | |
| placeholder="Ask me a question or type 'search for <your query>'...", | |
| container=False, | |
| scale=7, | |
| ), | |
| submit_btn="Send", | |
| retry_btn=None, | |
| undo_btn=None, | |
| clear_btn="Clear History", | |
| ) | |
| if __name__ == "__main__": | |
| # Before launching, remind the user to set the token if it's not found. | |
| if not os.environ.get(HF_TOKEN_ENV_VAR): | |
| print(f"CRITICAL: Environment variable {HF_TOKEN_ENV_VAR} not found.") | |
| print("Please set this as a secret in your Hugging Face Space settings.") | |
| demo.launch(share=True) |