Spaces:

Shriharsh
/

Mistral_7B_Chat

Sleeping

App Files Files Community

Shriharsh commited on May 7, 2025

Commit

67cfd82

verified ·

1 Parent(s): b484597

Update app.py

Browse files

Files changed (1) hide show

app.py +68 -34

app.py CHANGED Viewed

@@ -1,9 +1,23 @@
 from huggingface_hub import InferenceClient
 import gradio as gr
-client = InferenceClient(
-    "mistralai/Mistral-7B-Instruct-v0.3"
-)
 def format_prompt(message, history):
     prompt = "<s>"
@@ -16,32 +30,47 @@ def format_prompt(message, history):
 def generate(
     prompt, history, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0,
 ):
-    temperature = float(temperature)
-    if temperature < 1e-2:
-        temperature = 1e-2
-    top_p = float(top_p)
-    generate_kwargs = dict(
-        temperature=temperature,
-        max_new_tokens=max_new_tokens,
-        top_p=top_p,
-        repetition_penalty=repetition_penalty,
-        do_sample=True,
-        seed=42,
-    )
-    formatted_prompt = format_prompt(prompt, history)
-    stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
-    output = ""
-    for response in stream:
-        output += response.token.text
-        yield output
-    return output
-additional_inputs=[
     gr.Slider(
         label="Temperature",
         value=0.9,
@@ -58,7 +87,7 @@ additional_inputs=[
         maximum=1048,
         step=64,
         interactive=True,
-        info="The maximum numbers of new tokens",
     ),
     gr.Slider(
         label="Top-p (nucleus sampling)",
@@ -77,21 +106,26 @@ additional_inputs=[
         step=0.05,
         interactive=True,
         info="Penalize repeated tokens",
-    )
 ]
-# Create a Chatbot object with the desired height
-chatbot = gr.Chatbot(height=450,
-                     layout="bubble")
 with gr.Blocks() as demo:
-    gr.HTML("<h1><center>🤖 Mistral-7B-Chat 💬<h1><center>")
     gr.ChatInterface(
-        generate,
-        chatbot=chatbot,  # Use the created Chatbot object
         additional_inputs=additional_inputs,
-        examples=[["Give me the code for Binary Search in C++"], ["Explain the chapter of The Grand Inquistor from The Brothers Karmazov."], ["Explain Newton's second law."]],
     )
-demo.queue().launch(debug=True)

+import os
+import logging
 from huggingface_hub import InferenceClient
 import gradio as gr
+from requests.exceptions import ConnectionError
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Initialize the Hugging Face Inference Client
+try:
+    client = InferenceClient(
+        model="mistralai/Mistral-7B-Instruct-v0.3",
+        token=os.getenv("HF_TOKEN"),  # Ensure HF_TOKEN is set in your environment
+        timeout=30,
+    )
+except Exception as e:
+    logger.error(f"Failed to initialize InferenceClient: {e}")
+    raise
 def format_prompt(message, history):
     prompt = "<s>"
 def generate(
     prompt, history, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0,
 ):
+    try:
+        temperature = float(temperature)
+        if temperature < 1e-2:
+            temperature = 1e-2
+        top_p = float(top_p)
+        generate_kwargs = dict(
+            temperature=temperature,
+            max_new_tokens=max_new_tokens,
+            top_p=top_p,
+            repetition_penalty=repetition_penalty,
+            do_sample=True,
+            seed=42,
+        )
+        formatted_prompt = format_prompt(prompt, history)
+        logger.info("Sending request to Hugging Face API")
+        stream = client.text_generation(
+            formatted_prompt,
+            **generate_kwargs,
+            stream=True,
+            details=True,
+            return_full_text=False,
+        )
+        output = ""
+        for response in stream:
+            output += response.token.text
+            yield output
+        return output
+    except ConnectionError as e:
+        logger.error(f"Network error: {e}")
+        yield "Error: Unable to connect to the Hugging Face API. Please check your internet connection and try again."
+    except Exception as e:
+        logger.error(f"Error during text generation: {e}")
+        yield f"Error: {str(e)}"
+# Define additional inputs for Gradio interface
+additional_inputs = [
     gr.Slider(
         label="Temperature",
         value=0.9,
         maximum=1048,
         step=64,
         interactive=True,
+        info="The maximum number of new tokens",
     ),
     gr.Slider(
         label="Top-p (nucleus sampling)",
         step=0.05,
         interactive=True,
         info="Penalize repeated tokens",
+    ),
 ]
+# Create a Chatbot object
+chatbot = gr.Chatbot(height=450, layout="bubble")
+# Build the Gradio interface
 with gr.Blocks() as demo:
+    gr.HTML("<h1><center>🤖 Mistral-7B-Chat 💬</center></h1>")
     gr.ChatInterface(
+        fn=generate,
+        chatbot=chatbot,
         additional_inputs=additional_inputs,
+        examples=[
+            ["Give me the code for Binary Search in C++"],
+            ["Explain the chapter of The Grand Inquisitor from The Brothers Karamazov."],
+            ["Explain Newton's second law."],
+        ],
     )
+if __name__ == "__main__":
+    logger.info("Starting Gradio application")
+    demo.launch()