Spaces:

contenteaseAI
/

Llama_3.1_API

Build error

contenteaseAI commited on Jul 31, 2024

Commit

add7ded

verified ·

1 Parent(s): 807af7a

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,6 +1,11 @@
 import gradio as gr
 from openai import OpenAI
 import os
 css = '''
 .gradio-container{max-width: 1000px !important}
@@ -12,11 +17,19 @@ footer {
 ACCESS_TOKEN = os.getenv("HF_TOKEN")
 client = OpenAI(
     base_url="https://api-inference.huggingface.co/v1/",
     api_key=ACCESS_TOKEN,
 )
 def respond(
     message,
     history,
@@ -69,6 +82,7 @@ def respond(
             Please ensure that the output JSON is well-structured and includes only relevant details about the work to be done.
                   """
     messages = [{"role": "system", "content": SYS_PROMPT}]
     if len(history) == 0:
       pass
     else:
@@ -84,6 +98,9 @@ def respond(
     response = ""
     for message in  client.chat.completions.create(
         model="meta-llama/Meta-Llama-3.1-8B-Instruct",
         max_tokens=max_tokens,
@@ -96,6 +113,9 @@ def respond(
         response += token
         yield response
 DESCRIPTION = '''
 <div>

 import gradio as gr
 from openai import OpenAI
 import os
+import logging
+import time
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
 css = '''
 .gradio-container{max-width: 1000px !important}
 ACCESS_TOKEN = os.getenv("HF_TOKEN")
+start_time = time.time()
+logger.info("Loading Client....")
 client = OpenAI(
     base_url="https://api-inference.huggingface.co/v1/",
     api_key=ACCESS_TOKEN,
 )
+end_time = time.time()
+logger.info(f"Client Loaded. Time taken : {end_time - start_time} seconds.")
+#interact with API
 def respond(
     message,
     history,
             Please ensure that the output JSON is well-structured and includes only relevant details about the work to be done.
                   """
     messages = [{"role": "system", "content": SYS_PROMPT}]
     if len(history) == 0:
       pass
     else:
     response = ""
+    start_time = time.time()
+    logger.info("Generating Response....")
     for message in  client.chat.completions.create(
         model="meta-llama/Meta-Llama-3.1-8B-Instruct",
         max_tokens=max_tokens,
         response += token
         yield response
+     end_time = time.time()
+    logger.info(f"Response Generated. Time taken : {end_time - start_time} seconds.")
 DESCRIPTION = '''
 <div>