Spaces:

alphaoumardev
/

Summerpro

Runtime error

App Files Files Community

alphaoumardev commited on Aug 1, 2025

Commit

51e03da

verified ·

1 Parent(s): da0937c

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -15

app.py CHANGED Viewed

@@ -1,23 +1,27 @@
 import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
-# Replace with your actual model ID
 model_id = "alphaoumardev/Llama3-8B-noryu-instruct"
-tokenizer = AutoTokenizer.from_pretrained(model_id)
-model = AutoModelForCausalLM.from_pretrained(model_id)
 model.eval()
-# If you're using GPU on HF Spaces with GPU enabled
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.to(device)
 def chat(user_input, history=[]):
-    # Add user input to the history
     history.append({"role": "user", "content": user_input})
-    # Format prompt (adjust as needed depending on your training)
     prompt = ""
     for turn in history:
         role = turn["role"]
@@ -38,24 +42,19 @@ def chat(user_input, history=[]):
         )
     output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    # Extract just the assistant's reply
     assistant_reply = output_text.split("assistant:")[-1].strip()
     history.append({"role": "assistant", "content": assistant_reply})
-    # Return response and updated history for Gradio
     chat_history = [(h["content"], history[i + 1]["content"]) for i, h in enumerate(history[:-1]) if h["role"] == "user"]
     return chat_history, history
-# Set up Gradio ChatInterface
 with gr.Blocks() as demo:
     chatbot = gr.Chatbot()
-    state = gr.State([])  # for storing history
     txt = gr.Textbox(show_label=False, placeholder="Type your message...")
-    def user_submit(user_message, history):
-        return chat(user_message, history)
-    txt.submit(user_submit, [txt, state], [chatbot, state])
 demo.launch()

+import os
 import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
+# Get the HF token from environment
+hf_token = os.getenv("HUGGINGFACE_TOKEN")
+# Your fine-tuned model
 model_id = "alphaoumardev/Llama3-8B-noryu-instruct"
+# Authenticate with token when loading tokenizer/model
+tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=hf_token)
+model = AutoModelForCausalLM.from_pretrained(model_id, use_auth_token=hf_token)
 model.eval()
+# Device setup
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.to(device)
 def chat(user_input, history=[]):
     history.append({"role": "user", "content": user_input})
+    # Format the prompt
     prompt = ""
     for turn in history:
         role = turn["role"]
         )
     output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
     assistant_reply = output_text.split("assistant:")[-1].strip()
     history.append({"role": "assistant", "content": assistant_reply})
+    # Gradio expects tuple list format for Chatbot display
     chat_history = [(h["content"], history[i + 1]["content"]) for i, h in enumerate(history[:-1]) if h["role"] == "user"]
     return chat_history, history
+# Gradio Blocks UI
 with gr.Blocks() as demo:
     chatbot = gr.Chatbot()
+    state = gr.State([])  # memory of the conversation
     txt = gr.Textbox(show_label=False, placeholder="Type your message...")
+    txt.submit(chat, [txt, state], [chatbot, state])
 demo.launch()