Spaces:

algorythmtechnologies
/

Zenith_Coder-V1.1

Runtime error

App Files Files Community

algorythmtechnologies commited on Nov 19, 2025

Commit

b9dffba

verified ·

1 Parent(s): 84ffb49

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -123

app.py CHANGED Viewed

@@ -1,130 +1,70 @@
 import gradio as gr
-import torch
-from transformers import AutoTokenizer, TextIteratorStreamer, AutoModelForCausalLM
-import requests
-import json
-from peft import PeftModel
-from threading import Thread
-# --- Configuration ---
-BASE_MODEL_PATH = "algorythmtechnologies/zenith_coder_v1.1"
-ADAPTER_SUBFOLDER = "checkpoint-300"
-SERPER_API_KEY = "e43f937b155ec4feafb0458e4a7693b0d4889db4"
-# --- Model Loading ---
-# Load the tokenizer
-tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_PATH)
-# Load the model
-base_model = AutoModelForCausalLM.from_pretrained(
-    BASE_MODEL_PATH,
-    trust_remote_code=True,
-    low_cpu_mem_usage=True,
-    torch_dtype=torch.bfloat16,
 )
-# Move model to appropriate device
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-base_model.to(device)
-# Load the PEFT adapter from the subfolder in the Hub repository
-model = PeftModel.from_pretrained(base_model, BASE_MODEL_PATH, subfolder=ADAPTER_SUBFOLDER)
-model.eval()
-# --- Web Search Function ---
-def search(query):
-    """Performs a web search using the Serper API."""
-    url = "https://google.serper.dev/search"
-    payload = json.dumps({"q": query})
-    headers = {
-        'X-API-KEY': SERPER_API_KEY,
-        'Content-Type': 'application/json'
-    }
-    try:
-        response = requests.request("POST", url, headers=headers, data=payload)
-        response.raise_for_status()
-        results = response.json()
-        return results.get('organic', [])
-    except requests.exceptions.RequestException as e:
-        print(f"Error during web search: {e}")
-        return []
-# --- Response Generation ---
-def generate_response(message, history):
-    """Generates a response from the model, with optional web search."""
-    full_prompt = ""
-    for user_msg, assistant_msg in history:
-        full_prompt += f"User: {user_msg}\nAssistant: {assistant_msg}\n"
-    full_prompt += f"User: {message}\nAssistant:"
-    search_results = None
-    if message.lower().startswith("search for "):
-        search_query = message[len("search for "):]
-        search_results = search(search_query)
-    if search_results:
-        context = " ".join([res.get('snippet', '') for res in search_results[:5]])
-        full_prompt = f"Based on the following search results: {context}\n\nUser: {message}\nAssistant:"
-    inputs = tokenizer(full_prompt, return_tensors="pt").to(device)
-    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
-    generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=512)
-    thread = Thread(target=model.generate, kwargs=generation_kwargs)
-    thread.start()
-    generated_text = ""
-    for new_text in streamer:
-        generated_text += new_text
-        yield generated_text
-# --- Gradio UI ---
-with gr.Blocks(theme=gr.themes.Soft(primary_hue="sky")) as demo:
-    gr.Markdown("# Zenith")
-    gr.ChatInterface(
-        generate_response,
-        chatbot=gr.Chatbot(
-            height=600,
-            avatar_images=(None, "https://i.imgur.com/9kAC4pG.png"),
-            bubble_full_width=False,
-        ),
-        textbox=gr.Textbox(
-            placeholder="Ask me anything or type 'search for <your query>'...",
-            container=False,
-            scale=7,
-        ),
-        theme="soft",
-        title=None,
-        submit_btn="Send",
-    )
 if __name__ == "__main__":
     demo.launch()

 import gradio as gr
+from huggingface_hub import InferenceClient
+def respond(
+    message,
+    history: list[dict[str, str]],
+    system_message,
+    max_tokens,
+    temperature,
+    top_p,
+    hf_token: gr.OAuthToken,
+):
+    """
+    For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
+    """
+    client = InferenceClient(token=hf_token.token, model="algorythmtechnologies/zenith_coder_v1.1")
+    messages = [{"role": "system", "content": system_message}]
+    messages.extend(history)
+    messages.append({"role": "user", "content": message})
+    response = ""
+    for message in client.chat_completion(
+        messages,
+        max_tokens=max_tokens,
+        stream=True,
+        temperature=temperature,
+        top_p=top_p,
+    ):
+        choices = message.choices
+        token = ""
+        if len(choices) and choices[0].delta.content:
+            token = choices[0].delta.content
+        response += token
+        yield response
+"""
+For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
+"""
+chatbot = gr.ChatInterface(
+    respond,
+    type="messages",
+    additional_inputs=[
+        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
+        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
+        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
+        gr.Slider(
+            minimum=0.1,
+            maximum=1.0,
+            value=0.95,
+            step=0.05,
+            label="Top-p (nucleus sampling)",
+        ),
+    ],
 )
+with gr.Blocks() as demo:
+    with gr.Sidebar():
+        gr.LoginButton()
+    chatbot.render()
 if __name__ == "__main__":
     demo.launch()