ekjotsingh commited on
Commit
ecd24a9
·
verified ·
1 Parent(s): 8d02d7a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -19
app.py CHANGED
@@ -1,15 +1,10 @@
1
  import os
2
  import struct
3
  import gradio as gr
 
4
  from cryptography.hazmat.primitives.ciphers.aead import AESGCM
5
  from huggingface_hub import hf_hub_download, login
6
-
7
- # CRITICAL IMPORT: We do this inside a try block to catch the error early
8
- try:
9
- from llama_cpp import Llama
10
- print("✅ Llama-CPP Loaded Successfully.")
11
- except Exception as e:
12
- print(f"❌ Llama-CPP Load Failed: {e}")
13
 
14
  # --- CONFIG ---
15
  SOURCE_REPO = "metanthropic/metanthropic-phi3-encrypted"
@@ -20,22 +15,15 @@ HF_TOKEN = os.environ.get("HF_TOKEN")
20
 
21
  def unlock():
22
  if os.path.exists(TEMP_DECRYPTED): return
23
- print(f"⬇️ Fetching {SOURCE_FILENAME}...")
24
-
25
  if HF_TOKEN: login(token=HF_TOKEN)
26
-
27
  path = hf_hub_download(repo_id=SOURCE_REPO, filename=SOURCE_FILENAME)
28
-
29
- print("🔓 Decrypting...")
30
  key = bytes.fromhex(SECRET_KEY_HEX)
31
  aes = AESGCM(key)
32
-
33
  with open(path, "rb") as f_in, open(TEMP_DECRYPTED, "wb") as f_out:
34
  nonce = f_in.read(12)
35
  h_len = struct.unpack("<I", f_in.read(4))[0]
36
  f_out.write(aes.decrypt(nonce, f_in.read(h_len), None))
37
  while chunk := f_in.read(64*1024*1024): f_out.write(chunk)
38
- print("✅ Ready.")
39
 
40
  llm = None
41
  try:
@@ -44,9 +32,28 @@ try:
44
  except Exception as e:
45
  print(f"❌ Boot Error: {e}")
46
 
47
- def chat(msg, history):
48
- if not llm: return "System offline."
49
- prompt = f"<|user|>\n{msg}<|end|>\n<|assistant|>"
50
- return llm(prompt, max_tokens=512, stop=["<|end|>"])['choices'][0]['text'].strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
- gr.ChatInterface(chat).launch(server_name="0.0.0.0", server_port=7860)
 
 
 
1
  import os
2
  import struct
3
  import gradio as gr
4
+ from llama_cpp import Llama
5
  from cryptography.hazmat.primitives.ciphers.aead import AESGCM
6
  from huggingface_hub import hf_hub_download, login
7
+ from fastapi import FastAPI, Request
 
 
 
 
 
 
8
 
9
  # --- CONFIG ---
10
  SOURCE_REPO = "metanthropic/metanthropic-phi3-encrypted"
 
15
 
16
  def unlock():
17
  if os.path.exists(TEMP_DECRYPTED): return
 
 
18
  if HF_TOKEN: login(token=HF_TOKEN)
 
19
  path = hf_hub_download(repo_id=SOURCE_REPO, filename=SOURCE_FILENAME)
 
 
20
  key = bytes.fromhex(SECRET_KEY_HEX)
21
  aes = AESGCM(key)
 
22
  with open(path, "rb") as f_in, open(TEMP_DECRYPTED, "wb") as f_out:
23
  nonce = f_in.read(12)
24
  h_len = struct.unpack("<I", f_in.read(4))[0]
25
  f_out.write(aes.decrypt(nonce, f_in.read(h_len), None))
26
  while chunk := f_in.read(64*1024*1024): f_out.write(chunk)
 
27
 
28
  llm = None
29
  try:
 
32
  except Exception as e:
33
  print(f"❌ Boot Error: {e}")
34
 
35
+ # --- API LOGIC ---
36
+ def generate(prompt):
37
+ if not llm: return "Error: Model not loaded"
38
+ output = llm(f"<|user|>\n{prompt}<|end|>\n<|assistant|>", max_tokens=512, stop=["<|end|>"])
39
+ return output['choices'][0]['text'].strip()
40
+
41
+ # Create the Gradio App
42
+ demo = gr.ChatInterface(fn=lambda msg, hist: generate(msg), title="Metanthropic Phi-3 API Node")
43
+
44
+ # Mount it to FastAPI to allow external API calls
45
+ app = FastAPI()
46
+
47
+ @app.post("/run_inference")
48
+ async def run_inference(request: Request):
49
+ data = await request.json()
50
+ prompt = data.get("prompt", "")
51
+ response = generate(prompt)
52
+ return {"response": response}
53
+
54
+ # Launch both
55
+ app = gr.mount_gradio_app(app, demo, path="/")
56
 
57
+ if __name__ == "__main__":
58
+ import uvicorn
59
+ uvicorn.run(app, host="0.0.0.0", port=7860)