File size: 5,684 Bytes
e335e03
 
3a3e728
e335e03
 
 
 
 
 
 
 
40a53b0
e335e03
 
 
 
 
 
 
 
 
 
 
 
 
 
3a3e728
 
 
 
 
 
 
 
 
e335e03
 
3a3e728
e335e03
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40a53b0
e335e03
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
import gradio as gr
import torch
from transformers import AutoTokenizer, TextIteratorStreamer, AutoModelForCausalLM, AutoConfig
import requests
import json
from peft import PeftModel
from threading import Thread
import os

# --- Configuration ---
# The model is loaded from the Hugging Face Hub
BASE_MODEL_PATH = "algorythmtechnologies/zenith_coder_v1.1"
# Name of the environment variable for the Hugging Face token
HF_TOKEN_ENV_VAR = "HUGGING_FACE_HUB_TOKEN"

# --- Model Loading ---

# Get the Hugging Face token from environment variables
hf_token = os.environ.get(HF_TOKEN_ENV_VAR)

if not hf_token:
    raise ValueError(f"Environment variable {HF_TOKEN_ENV_VAR} not set. Please set it in your Space secrets.")

# Load the tokenizer from the Hub, using the token for private models
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_PATH, use_auth_token=hf_token)

from transformers import AutoConfig

# Load the config from the user's repo
config = AutoConfig.from_pretrained(BASE_MODEL_PATH, use_auth_token=hf_token)

# Correct the base model path in the config
config._name_or_path = "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct"

# Load the base model from the Hub using the corrected config
base_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL_PATH,
    config=config,
    trust_remote_code=True,
    low_cpu_mem_usage=True,
    torch_dtype=torch.bfloat16,
    use_auth_token=hf_token
)

# Move model to the appropriate device (GPU if available, otherwise CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
base_model.to(device)

# The PEFT model is loaded from the same repository.
# PeftModel will automatically find the adapter configuration.
model = PeftModel.from_pretrained(base_model, BASE_MODEL_PATH, use_auth_token=hf_token)
model.eval()

# --- Web Search Function ---
def search(query):
    """Performs a web search using the Serper API."""
    serper_api_key = os.environ.get("SERPER_API_KEY")
    if not serper_api_key:
        return "SERPER_API_KEY not found. Please set it as an environment variable in your Hugging Face Space secrets."
    
    url = "https://google.serper.dev/search"
    payload = json.dumps({"q": query})
    headers = {
        'X-API-KEY': serper_api_key,
        'Content-Type': 'application/json'
    }
    try:
        response = requests.request("POST", url, headers=headers, data=payload)
        response.raise_for_status()
        results = response.json()
        return results.get('organic', [])
    except requests.exceptions.RequestException as e:
        return f"Error during web search: {e}"

# --- Response Generation ---
def generate_response(message, history):
    """Generates a response from the model, with optional web search."""
    
    # Handle web search command
    if message.lower().startswith("search for "):
        search_query = message[len("search for "):]
        search_results = search(search_query)
        
        if isinstance(search_results, str): # Error case
            yield search_results
            return

        if not search_results:
            yield "No search results found."
            return

        context = " ".join([res.get('snippet', '') for res in search_results[:5]])
        # Prepend context to the user's message
        message = f"Based on the following search results, answer the user's query.\nContext: {context}\n\nUser Query: {message}"

    # Format chat history and new message using the tokenizer's chat template
    chat_messages = []
    for user_msg, assistant_msg in history:
        chat_messages.append({"role": "user", "content": user_msg})
        if assistant_msg:
            chat_messages.append({"role": "assistant", "content": assistant_msg})
    chat_messages.append({"role": "user", "content": message})

    # Apply the chat template
    prompt = tokenizer.apply_chat_template(chat_messages, tokenize=False, add_generation_prompt=True)

    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

    generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024)
    
    # Run generation in a separate thread
    thread = Thread(target=model.generate, kwargs=generation_kwargs)
    thread.start()

    # Yield generated text as it becomes available
    generated_text = ""
    for new_text in streamer:
        generated_text += new_text
        yield generated_text

# --- Gradio UI ---
with gr.Blocks(theme=gr.themes.Soft(primary_hue="sky", secondary_hue="blue")) as demo:
    gr.HTML("<h1 align='center'>Zenith V1.1 Coder</h1>")
    gr.Markdown("This Space is running [zenith_coder_v1.1](https://huggingface.co/algorythmtechnologies/zenith_coder_v1.1).<br>You can ask coding questions or use the 'search for <query>' command to browse the web.")
    
    gr.ChatInterface(
        generate_response,
        chatbot=gr.Chatbot(
            height=600,
            avatar_images=(None, "https://i.imgur.com/9kAC4pG.png"),
            bubble_full_width=False,
        ),
        textbox=gr.Textbox(
            placeholder="Ask me a question or type 'search for <your query>'...",
            container=False,
            scale=7,
        ),
        submit_btn="Send",
        retry_btn=None,
        undo_btn=None,
        clear_btn="Clear History",
    )

if __name__ == "__main__":
    # Before launching, remind the user to set the token if it's not found.
    if not os.environ.get(HF_TOKEN_ENV_VAR):
        print(f"CRITICAL: Environment variable {HF_TOKEN_ENV_VAR} not found.")
        print("Please set this as a secret in your Hugging Face Space settings.")
    demo.launch(share=True)