xcv

Sleeping

App Files Files Community

soiz1 commited on Apr 18, 2025

Commit

413d245

verified ·

1 Parent(s): d6af3fb

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -18

app.py CHANGED Viewed

@@ -210,22 +210,25 @@ HTML_CONTENT = '''
 </body>
 </html>
 '''
 def download_model():
     model_name = "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct"
-    model_file = "deepseek-coder-v2-lite-instruct.Q6_K.gguf"  # 適切なGGUFファイル名に変更
-    return hf_hub_download(model_name, filename=model_file)
-def initialize_model(model_path):
-    return Llama(
-        model_path=model_path,
-        n_ctx=4096,
-        n_threads=4,
-        n_gpu_layers=-1  # Use GPU if available
-    )
-model_path = "/app/pytorch_model.bin"
-llm = initialize_model(model_path)
 system_prompt = (
     "You are a helpful AI coding assistant. Your mission is to help people with programming "
@@ -236,24 +239,35 @@ chat_history = [{"role": "system", "content": system_prompt}]
 @app.route('/')
 def index():
-    return HTML_CONTENT
 @app.route('/chat')
 def chat():
     global chat_history
     user_message = request.args.get('message', '')
     chat_history.append({"role": "user", "content": user_message})
-    full_prompt = "\n".join([f"{msg['role']}: {msg['content']}" for msg in chat_history])
-    full_prompt += "\nAssistant:"
     def generate():
         ai_response = ""
-        for token in llm(full_prompt, max_tokens=1000, stop=["User:"], stream=True):
-            chunk = token['choices'][0]['text']
             if chunk:
                 ai_response += chunk
                 yield f"data: {chunk}\n\n"
         chat_history.append({"role": "assistant", "content": ai_response.strip()})
         if len(chat_history) > 10:  # Limit history to last 10 messages
             chat_history = chat_history[-10:]

 </body>
 </html>
 '''
 def download_model():
     model_name = "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct"
+    model_file = "deepseek-coder-v2-lite-instruct.Q6_K.gguf"  # or another quantized version
+    return hf_hub_download(repo_id=model_name, filename=model_file)
+def initialize_model():
+    try:
+        model_path = download_model()
+        return Llama(
+            model_path=model_path,
+            n_ctx=4096,
+            n_threads=4,
+            n_gpu_layers=-1  # Use GPU if available
+        )
+    except Exception as e:
+        print(f"Error initializing model: {e}")
+        return None
+llm = initialize_model()
 system_prompt = (
     "You are a helpful AI coding assistant. Your mission is to help people with programming "
 @app.route('/')
 def index():
+    return render_template('index.html')  # You should move your HTML to a templates folder
 @app.route('/chat')
 def chat():
     global chat_history
     user_message = request.args.get('message', '')
+    if not llm:
+        return Response("data: Model not loaded\n\ndata: [DONE]\n\n", content_type='text/event-stream')
     chat_history.append({"role": "user", "content": user_message})
     def generate():
         ai_response = ""
+        # Format messages for the model
+        messages = [{"role": msg["role"], "content": msg["content"]} for msg in chat_history]
+        stream = llm.create_chat_completion(
+            messages=messages,
+            max_tokens=1000,
+            stop=["User:"],
+            stream=True
+        )
+        for output in stream:
+            chunk = output['choices'][0]['delta'].get('content', '')
             if chunk:
                 ai_response += chunk
                 yield f"data: {chunk}\n\n"
         chat_history.append({"role": "assistant", "content": ai_response.strip()})
         if len(chat_history) > 10:  # Limit history to last 10 messages
             chat_history = chat_history[-10:]