Spaces:
Sleeping
Sleeping
| import os | |
| import json | |
| import base64 | |
| from flask import Flask, request, jsonify, render_template_string | |
| from huggingface_hub import InferenceClient | |
| app = Flask(__name__) | |
| HF_TOKEN = os.getenv("HF_TOKEN") | |
| # МЕНЯЕМ МОДЕЛЬ НА LLAMA VISION (она более лояльна к API) | |
| MODEL_ID = "meta-llama/Llama-3.2-11B-Vision-Instruct" | |
| client = InferenceClient(MODEL_ID, token=HF_TOKEN) | |
| state = { | |
| "task": None, | |
| "task_id": 0, | |
| "last_reply": "Джарвис на связи.", | |
| "status": "READY", | |
| "raw_ai_output": "" | |
| } | |
| def set_task(): | |
| global state | |
| state["task"] = request.json.get("task") | |
| state["task_id"] = request.json.get("id", 0) | |
| state["status"] = "PENDING" | |
| return jsonify({"status": "ok"}) | |
| def get_task(): | |
| return jsonify({"task": state["task"], "id": state["task_id"]}) | |
| def process(): | |
| global state | |
| data = request.json | |
| img_b64 = data.get("img") | |
| # Очень короткий и ясный промпт | |
| prompt = f"<|image|>\nTask: {state['task']}. You are a PC robot. Output ONLY a JSON array of actions like click(cell 1-100), type(text), press(key), wait(sec), speak(text). Example: [{{'type':'press','key':'win'}},{{'type':'type','text':'notepad'}}]" | |
| try: | |
| response = client.chat_completion( | |
| messages=[{"role": "user", "content": prompt}], | |
| max_tokens=300 | |
| ) | |
| ai_res = response.choices[0].message.content.strip() | |
| state["raw_ai_output"] = ai_res | |
| # Извлекаем JSON | |
| start = ai_res.find('[') | |
| end = ai_res.rfind(']') + 1 | |
| if start != -1 and end != 0: | |
| json_str = ai_res[start:end] | |
| actions = json.loads(json_str.replace("'", '"')) | |
| else: | |
| raise ValueError("JSON not found in response") | |
| for a in actions: | |
| if a['type'] == 'speak': state["last_reply"] = a['text'] | |
| state["task"] = None | |
| state["status"] = "DONE" | |
| return jsonify({"actions": actions}) | |
| except Exception as e: | |
| state["status"] = "ERROR" | |
| return jsonify({"error": str(e)}), 500 | |
| def get_status(): return jsonify(state) | |
| def index(): | |
| return render_template_string(''' | |
| <!DOCTYPE html> | |
| <html> | |
| <head> | |
| <title>Jarvis Command</title> | |
| <style> | |
| body { background: #000; color: #fff; font-family: -apple-system, sans-serif; display: flex; justify-content: center; align-items: center; min-height: 100vh; margin: 0; } | |
| .glass { background: rgba(255,255,255,0.05); backdrop-filter: blur(20px); border: 1px solid rgba(255,255,255,0.1); border-radius: 30px; padding: 40px; width: 400px; text-align: center; } | |
| input { background: rgba(255,255,255,0.1); border: none; border-radius: 12px; color: #fff; padding: 15px; width: 100%; box-sizing: border-box; margin: 20px 0; outline: none; } | |
| button { background: #0A84FF; border: none; border-radius: 12px; color: #fff; padding: 15px; width: 100%; cursor: pointer; font-weight: bold; } | |
| .status { margin-top: 20px; color: #30D158; font-size: 14px; } | |
| </style> | |
| </head> | |
| <body> | |
| <div class="glass"> | |
| <h1>Jarvis OS</h1> | |
| <div id="st" style="opacity:0.3; font-size:10px;">IDLE</div> | |
| <input type="text" id="in" placeholder="Ваша команда..."> | |
| <button onclick="s()">ОТПРАВИТЬ</button> | |
| <div id="re" class="status">Ожидание...</div> | |
| <div style="font-size:8px; color:#222; margin-top:10px;" id="raw"></div> | |
| </div> | |
| <script> | |
| let l = ""; | |
| async function s() { | |
| const t = document.getElementById('in').value; | |
| await fetch('/set_task', {method:'POST', headers:{'Content-Type':'application/json'}, body:JSON.stringify({task: t, id: Date.now()})}); | |
| document.getElementById('in').value = ""; | |
| } | |
| setInterval(async () => { | |
| const r = await fetch('/status'); const d = await r.json(); | |
| document.getElementById('st').innerText = d.status; | |
| document.getElementById('raw').innerText = d.raw_ai_output; | |
| if(d.last_reply !== l) { | |
| l = d.last_reply; document.getElementById('re').innerText = l; | |
| const u = new SpeechSynthesisUtterance(l); u.lang='ru-RU'; window.speechSynthesis.speak(u); | |
| } | |
| }, 2000); | |
| document.body.onclick = () => window.speechSynthesis.speak(new SpeechSynthesisUtterance("")); | |
| </script> | |
| </body> | |
| </html> | |
| ''') | |
| if __name__ == "__main__": | |
| app.run(host="0.0.0.0", port=7860) |