codespace

Sleeping

App Files Files Community

ghosthets commited on Jun 24, 2025

Commit

2a3136f

verified ·

1 Parent(s): 63d5a2c

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -32

app.py CHANGED Viewed

@@ -1,41 +1,53 @@
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
-# ✅ Load your own model from Hugging Face
-model_id = "dexcommunity/dex"  # 👈 yahan apna model path do
-# Load tokenizer and model
-tokenizer = AutoTokenizer.from_pretrained(model_id)
-model = AutoModelForCausalLM.from_pretrained(model_id)
-# Check device (GPU ya CPU)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.to(device)
-# Chat function
-def ask_dex(prompt, max_length=256):
-    input_text = f"User: {prompt}\nDex:"
-    inputs = tokenizer(input_text, return_tensors="pt").to(device)
-    output = model.generate(
-        **inputs,
-        max_length=max_length,
-        do_sample=True,
-        top_k=50,
-        top_p=0.9,
-        temperature=0.7,
-        pad_token_id=tokenizer.eos_token_id
-    )
-    decoded = tokenizer.decode(output[0], skip_special_tokens=True)
-    reply = decoded.split("Dex:")[-1].strip()
-    return reply
-# Example use
 if __name__ == "__main__":
-    while True:
-        user_input = input("You: ")
-        if user_input.lower() in ["exit", "quit"]:
-            break
-        response = ask_dex(user_input)
-        print("Dex:", response)

+from flask import Flask, request, jsonify
 from transformers import AutoTokenizer, AutoModelForCausalLM
+from huggingface_hub import login
 import torch
+import os
+app = Flask(__name__)
+# ✅ Securely fetch HF Token from environment (invisible to users)
+hf_token = os.getenv("HF_TOKEN")
+if not hf_token:
+    raise ValueError("HF_TOKEN is not set in environment variables!")
+# 🔐 Authenticate
+login(token=hf_token)
+# 🔄 Load model from Hugging Face
+model_id = "dexcommunity/dex"
+print("🔄 Loading model...")
+tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token)
+model = AutoModelForCausalLM.from_pretrained(model_id, token=hf_token)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.to(device)
+print("✅ Model loaded!")
+@app.route('/chat', methods=['POST'])
+def chat():
+    try:
+        data = request.get_json()
+        msg = data.get("message", "")
+        if not msg:
+            return jsonify({"error": "No message sent"}), 400
+        prompt = f"User: {msg}\nDex:"
+        inputs = tokenizer(prompt, return_tensors="pt").to(device)
+        outputs = model.generate(
+            inputs.input_ids,
+            max_length=256,
+            do_sample=True,
+            top_k=50,
+            top_p=0.95,
+            temperature=0.7,
+            pad_token_id=tokenizer.eos_token_id
+        )
+        text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        reply = text.split("Dex:")[-1].strip()
+        return jsonify({"reply": reply})
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
 if __name__ == "__main__":
+    app.run(host='0.0.0.0', port=7860)