Spaces:

nosadaniel
/

fined-model

Sleeping

App Files Files Community

nosadaniel commited on Nov 17, 2025

Commit

31be601

verified ·

1 Parent(s): f127a01

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -16

app.py CHANGED Viewed

@@ -1,7 +1,9 @@
 import gradio as gr
 from huggingface_hub import InferenceClient
 def respond(
     message,
     history: list[dict[str, str]],
@@ -14,19 +16,28 @@ def respond(
     """
     For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
     """
-    client = InferenceClient(token=hf_token.token, model="meta-llama/Meta-Llama-3.1-8B-Instruct-LoRa:phishing-email-adJu",  base_url="https://api.tokenfactory.nebius.com/v1/", api_key="v1.CmQKHHN0YXRpY2tleS1lMDBkMXh2ZDdheDAwNXhxMGgSIXNlcnZpY2VhY2NvdW50LWUwMGp0eHNrM3pubjdyYXQ0azIMCPHv7MgGEJ_k6PEBOgwI8PKElAcQwO2YywNAAloDZTAw.AAAAAAAAAAH-boLssQhDYJht_li9Ql7MN1rSmj_8DXmYlZ13NhdavV0NYylvY_HkVQrALXt2z9Pm5_aQn-tt--Mbc1W8G78E")
     messages = [{"role": "system", "content": system_message}]
     messages.extend(history)
     messages.append({"role": "user", "content": message})
     response = ""
     for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
         stream=True,
         temperature=temperature,
         top_p=top_p,
@@ -35,11 +46,11 @@ def respond(
         token = ""
         if len(choices) and choices[0].delta.content:
             token = choices[0].delta.content
         response += token
         yield response
 """
 For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
 """
@@ -59,12 +70,12 @@ chatbot = gr.ChatInterface(
         ),
     ],
 )
 with gr.Blocks() as demo:
     with gr.Sidebar():
         gr.LoginButton()
     chatbot.render()
 if __name__ == "__main__":
     demo.launch()

 import gradio as gr
 from huggingface_hub import InferenceClient
+#from transformers import pipeline
+from huggingface_hub.inference._providers import PROVIDER_OR_POLICY_T
+from transformers import pipeline
 def respond(
     message,
     history: list[dict[str, str]],
     """
     For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
     """
+    #client = pipeline("text-generation", model="nosadaniel/llama3-1-8b-tuned")
+    #client = InferenceClient(token=hf_token.token, model="nosadaniel/llama3-1-8b-tuned")
+    model="meta-llama/Meta-Llama-3.1-8B-Instruct-LoRa:phishing-email-adJu"
+    base_url="https://api.tokenfactory.nebius.com/v1/"
+    api_key="v1.CmQKHHN0YXRpY2tleS1lMDBkMXh2ZDdheDAwNXhxMGgSIXNlcnZpY2VhY2NvdW50LWUwMGp0eHNrM3pubjdyYXQ0azIMCPHv7MgGEJ_k6PEBOgwI8PKElAcQwO2YywNAAloDZTAw.AAAAAAAAAAH-boLssQhDYJht_li9Ql7MN1rSmj_8DXmYlZ13NhdavV0NYylvY_HkVQrALXt2z9Pm5_aQn-tt--Mbc1W8G78E"
+    client = InferenceClient( base_url=base_url, api_key=api_key, provider=PROVIDER_OR_POLICY_T)
     messages = [{"role": "system", "content": system_message}]
     messages.extend(history)
     messages.append({"role": "user", "content": message})
     response = ""
     for message in client.chat_completion(
+       model = model,
+       messages = messages,
+        max_tokens=max_tokens,
         stream=True,
         temperature=temperature,
         top_p=top_p,
         token = ""
         if len(choices) and choices[0].delta.content:
             token = choices[0].delta.content
         response += token
         yield response
 """
 For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
 """
         ),
     ],
 )
 with gr.Blocks() as demo:
     with gr.Sidebar():
         gr.LoginButton()
     chatbot.render()
 if __name__ == "__main__":
     demo.launch()