Spaces:

metanthropic
/

metanthropic-node-phi3

Sleeping

App Files Files Community

ekjotsingh commited on 30 days ago

Commit

ecd24a9

verified ·

1 Parent(s): 8d02d7a

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -19

app.py CHANGED Viewed

@@ -1,15 +1,10 @@
 import os
 import struct
 import gradio as gr
 from cryptography.hazmat.primitives.ciphers.aead import AESGCM
 from huggingface_hub import hf_hub_download, login
-# CRITICAL IMPORT: We do this inside a try block to catch the error early
-try:
-    from llama_cpp import Llama
-    print("✅ Llama-CPP Loaded Successfully.")
-except Exception as e:
-    print(f"❌ Llama-CPP Load Failed: {e}")
 # --- CONFIG ---
 SOURCE_REPO = "metanthropic/metanthropic-phi3-encrypted"
@@ -20,22 +15,15 @@ HF_TOKEN = os.environ.get("HF_TOKEN")
 def unlock():
     if os.path.exists(TEMP_DECRYPTED): return
-    print(f"⬇️ Fetching {SOURCE_FILENAME}...")
     if HF_TOKEN: login(token=HF_TOKEN)
     path = hf_hub_download(repo_id=SOURCE_REPO, filename=SOURCE_FILENAME)
-    print("🔓 Decrypting...")
     key = bytes.fromhex(SECRET_KEY_HEX)
     aes = AESGCM(key)
     with open(path, "rb") as f_in, open(TEMP_DECRYPTED, "wb") as f_out:
         nonce = f_in.read(12)
         h_len = struct.unpack("<I", f_in.read(4))[0]
         f_out.write(aes.decrypt(nonce, f_in.read(h_len), None))
         while chunk := f_in.read(64*1024*1024): f_out.write(chunk)
-    print("✅ Ready.")
 llm = None
 try:
@@ -44,9 +32,28 @@ try:
 except Exception as e:
     print(f"❌ Boot Error: {e}")
-def chat(msg, history):
-    if not llm: return "System offline."
-    prompt = f"<|user|>\n{msg}<|end|>\n<|assistant|>"
-    return llm(prompt, max_tokens=512, stop=["<|end|>"])['choices'][0]['text'].strip()
-gr.ChatInterface(chat).launch(server_name="0.0.0.0", server_port=7860)

 import os
 import struct
 import gradio as gr
+from llama_cpp import Llama
 from cryptography.hazmat.primitives.ciphers.aead import AESGCM
 from huggingface_hub import hf_hub_download, login
+from fastapi import FastAPI, Request
 # --- CONFIG ---
 SOURCE_REPO = "metanthropic/metanthropic-phi3-encrypted"
 def unlock():
     if os.path.exists(TEMP_DECRYPTED): return
     if HF_TOKEN: login(token=HF_TOKEN)
     path = hf_hub_download(repo_id=SOURCE_REPO, filename=SOURCE_FILENAME)
     key = bytes.fromhex(SECRET_KEY_HEX)
     aes = AESGCM(key)
     with open(path, "rb") as f_in, open(TEMP_DECRYPTED, "wb") as f_out:
         nonce = f_in.read(12)
         h_len = struct.unpack("<I", f_in.read(4))[0]
         f_out.write(aes.decrypt(nonce, f_in.read(h_len), None))
         while chunk := f_in.read(64*1024*1024): f_out.write(chunk)
 llm = None
 try:
 except Exception as e:
     print(f"❌ Boot Error: {e}")
+# --- API LOGIC ---
+def generate(prompt):
+    if not llm: return "Error: Model not loaded"
+    output = llm(f"<|user|>\n{prompt}<|end|>\n<|assistant|>", max_tokens=512, stop=["<|end|>"])
+    return output['choices'][0]['text'].strip()
+# Create the Gradio App
+demo = gr.ChatInterface(fn=lambda msg, hist: generate(msg), title="Metanthropic Phi-3 API Node")
+# Mount it to FastAPI to allow external API calls
+app = FastAPI()
+@app.post("/run_inference")
+async def run_inference(request: Request):
+    data = await request.json()
+    prompt = data.get("prompt", "")
+    response = generate(prompt)
+    return {"response": response}
+# Launch both
+app = gr.mount_gradio_app(app, demo, path="/")
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860)