Spaces:

Euryeth
/

LLM_Ariphes

Runtime error

App Files Files Community

Euryeth commited on Jun 9, 2025

Commit

65e7b56

verified ·

1 Parent(s): d4203b5

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -19

app.py CHANGED Viewed

@@ -7,17 +7,16 @@ from huggingface_hub import login
 from flask import Flask, request, jsonify, Response
 import gradio as gr
-# Authenticate with Hugging Face
 login(os.getenv("HUGGINGFACEHUB_API_TOKEN"))
 API_TOKEN = os.getenv("HF_API_TOKEN")
-# Load model and tokenizer
 model_name = "cerebras/btlm-3b-8k-chat"
 revision = "main"
 torch_dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
 os.environ['HF_HOME'] = '/tmp/cache'
-print("Loading model and tokenizer...")
 tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, revision=revision)
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
@@ -37,7 +36,7 @@ generator = pipeline(
     trust_remote_code=True
 )
-# Initialize Flask app
 app = Flask(__name__)
 @app.route("/")
@@ -116,36 +115,39 @@ def chat():
         }]
     })
-# Gradio Chat Interface
-chat_history = []
-def gradio_chat(message, history):
-    global chat_history
-    messages = [{"role": "user", "content": message}]
-    prompt = "User: {}\nAssistant:".format(message)
     output = generator(
-        prompt,
         max_new_tokens=256,
         temperature=0.7,
         top_p=0.9,
         repetition_penalty=1.1,
         do_sample=True
     )
-    reply = output[0]['generated_text'].replace(prompt, "").strip()
-    history.append((message, reply))
     return history, history
 with gr.Blocks() as demo:
-    gr.Markdown("### 🧠 Skyrim NPC LLM Interface")
     chatbot = gr.Chatbot()
-    msg = gr.Textbox()
     clear = gr.Button("Clear")
-    msg.submit(gradio_chat, [msg, chatbot], [chatbot, chatbot])
-    clear.click(lambda: [], None, chatbot)
-demo.launch(share=False, inline=True)
 if __name__ == "__main__":
     app.run(host="0.0.0.0", port=8080)

 from flask import Flask, request, jsonify, Response
 import gradio as gr
+# Hugging Face Auth
 login(os.getenv("HUGGINGFACEHUB_API_TOKEN"))
 API_TOKEN = os.getenv("HF_API_TOKEN")
+# Model config
 model_name = "cerebras/btlm-3b-8k-chat"
 revision = "main"
 torch_dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
 os.environ['HF_HOME'] = '/tmp/cache'
 tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, revision=revision)
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
     trust_remote_code=True
 )
+# Flask backend
 app = Flask(__name__)
 @app.route("/")
         }]
     })
+# ✅ Gradio Chat UI
+def gradio_chat(user_input, history=[]):
+    full_prompt = ""
+    for turn in history:
+        full_prompt += f"User: {turn[0]}\nAssistant: {turn[1]}\n"
+    full_prompt += f"User: {user_input}\nAssistant:"
     output = generator(
+        full_prompt,
         max_new_tokens=256,
         temperature=0.7,
         top_p=0.9,
         repetition_penalty=1.1,
         do_sample=True
     )
+    reply = output[0]["generated_text"].replace(full_prompt, "").strip()
+    history.append((user_input, reply))
     return history, history
 with gr.Blocks() as demo:
+    gr.Markdown("## 💬 Chat with Ariphes (LLM-powered)")
     chatbot = gr.Chatbot()
+    msg = gr.Textbox(placeholder="Ask me anything...", label="Message")
     clear = gr.Button("Clear")
+    state = gr.State([])
+    msg.submit(gradio_chat, [msg, state], [chatbot, state])
+    clear.click(lambda: ([], []), None, [chatbot, state])
+# ✅ Enable share=True so Hugging Face can access it
+demo.launch(share=True)
+# ✅ Still serve API endpoint for OpenAI-compatible connector
 if __name__ == "__main__":
     app.run(host="0.0.0.0", port=8080)