Spaces:

Proximile
/

MultitoolLlamaChat

Sleeping

App Files Files Community

ProximileAdmin commited on Sep 16, 2025

Commit

6fa1688

verified ·

1 Parent(s): 3b9ca6d

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -6

app.py CHANGED Viewed

@@ -3,7 +3,6 @@ import textwrap
 import datetime
 import json
 import gradio as gr
-from openai import OpenAI
 import urllib.request
 import feedparser
 import time
@@ -369,12 +368,18 @@ class LLM:
     def __init__(self, max_model_len: int = 4096):
         self.api_key = OAI_API_KEY
         self.max_model_len = max_model_len
-        self.client = OpenAI(base_url=ENDPOINT_URL, api_key=self.api_key)
-        #models_list = self.client.models.list()
-        #self.model_name = models_list.data[0].id
         self.model_name = MODEL_NAME
     def generate(self, prompt: str, sampling_params: dict) -> dict:
         completion_params = {
             "model": self.model_name,
             "prompt": prompt,
@@ -385,14 +390,47 @@ class LLM:
             "stream": False,
         }
         if "stop" in sampling_params:
             completion_params["stop"] = sampling_params["stop"]
         if "presence_penalty" in sampling_params:
             completion_params["presence_penalty"] = sampling_params["presence_penalty"]
         if "frequency_penalty" in sampling_params:
             completion_params["frequency_penalty"] = sampling_params["frequency_penalty"]
-        return self.client.completions.create(**completion_params)
 def form_chat_prompt(message_history, functions=functions_dict.keys()):
     """Builds the chat prompt for the LLM."""

 import datetime
 import json
 import gradio as gr
 import urllib.request
 import feedparser
 import time
     def __init__(self, max_model_len: int = 4096):
         self.api_key = OAI_API_KEY
         self.max_model_len = max_model_len
+        self.endpoint_url = ENDPOINT_URL
         self.model_name = MODEL_NAME
     def generate(self, prompt: str, sampling_params: dict) -> dict:
+        """
+        Generate completion using direct HTTP request instead of OpenAI SDK.
+        """
+        headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {self.api_key}"
+        }
         completion_params = {
             "model": self.model_name,
             "prompt": prompt,
             "stream": False,
         }
+        # Add optional parameters if present
         if "stop" in sampling_params:
             completion_params["stop"] = sampling_params["stop"]
         if "presence_penalty" in sampling_params:
             completion_params["presence_penalty"] = sampling_params["presence_penalty"]
         if "frequency_penalty" in sampling_params:
             completion_params["frequency_penalty"] = sampling_params["frequency_penalty"]
+        # Add stop_token_ids if supported by Hyperbolic
+        if "stop_token_ids" in sampling_params:
+            completion_params["stop_token_ids"] = sampling_params["stop_token_ids"]
+        url = f"{self.endpoint_url}/completions"
+        try:
+            response = requests.post(url, headers=headers, json=completion_params)
+            response.raise_for_status()
+            # Format response to match expected structure
+            response_data = response.json()
+            # Create a response object that matches the OpenAI completion format
+            class CompletionResponse:
+                def __init__(self, data):
+                    self.choices = []
+                    if "choices" in data:
+                        for choice in data["choices"]:
+                            self.choices.append(type('Choice', (), {
+                                'text': choice.get('text', ''),
+                                'index': choice.get('index', 0),
+                                'finish_reason': choice.get('finish_reason', None)
+                            })())
+            return CompletionResponse(response_data)
+        except requests.exceptions.RequestException as e:
+            lgs(f"Request failed: {e}")
+            if hasattr(e, 'response') and e.response is not None:
+                lgs(f"Response status: {e.response.status_code}")
+                lgs(f"Response body: {e.response.text}")
+            raise Exception(f"API request failed: {str(e)}")
 def form_chat_prompt(message_history, functions=functions_dict.keys()):
     """Builds the chat prompt for the LLM."""