trackingsvg commited on
Commit
46e9943
·
verified ·
1 Parent(s): 9862305

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +117 -42
app.py CHANGED
@@ -7,130 +7,205 @@ import gc
7
  from datetime import datetime
8
  from bs4 import BeautifulSoup
9
  from contextlib import asynccontextmanager
 
10
  from fastapi import FastAPI, Request
11
  from fastapi.responses import JSONResponse
12
  from fastapi.middleware.cors import CORSMiddleware
13
- from llama_cpp import Llama
 
14
  from huggingface_hub import hf_hub_download
15
 
16
  # ==========================================
17
  # 1. CONFIGURATION & CORE SETUP
18
  # ==========================================
 
19
  SECRET_KEY = os.environ.get("REAPERAI_SECRET", "jan30")
 
20
  MODEL_REPO = "bartowski/Qwen2.5-7B-Instruct-GGUF"
21
- MODEL_FILE = "Qwen2.5-7B-Instruct-Q4_K_M.gguf" # Specific filename required for download
22
 
23
  chat_memory = {}
24
  MAX_GLOBAL_USERS = 50
25
  memory_lock = threading.Lock()
 
 
26
  model_semaphore = threading.Semaphore(1)
27
- llm = None # Initialize as None to avoid NameError
 
28
 
29
  # ==========================================
30
- # 2. MODEL LOADING (FIXED FOR COMPATIBILITY)
31
  # ==========================================
 
32
  @asynccontextmanager
33
  async def lifespan(app: FastAPI):
34
- print(f"--- [SYSTEM] Downloading & Loading GGUF: {MODEL_FILE} ---")
35
  global llm
 
 
36
  try:
37
- # Step 1: Securely download the model file to local cache
38
- path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE)
39
-
40
- # Step 2: Initialize using the local path
 
 
 
41
  llm = Llama(
42
- model_path=path,
43
- n_ctx=2048,
44
- n_threads=2,
 
45
  verbose=False
46
  )
47
- print("--- [SYSTEM] ReaperAI Core Online ---")
 
 
48
  except Exception as e:
49
- print(f"--- [CRITICAL ERROR] Loading Failed: {e} ---")
 
 
50
  yield
51
 
 
 
 
 
52
  app = FastAPI(title="ReaperAI Secure Core", lifespan=lifespan)
53
- app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["POST"], allow_headers=["*"])
 
 
 
 
 
 
54
 
55
  # ==========================================
56
- # 3. AUTONOMOUS TOOLS (REFINED)
57
  # ==========================================
 
58
  def ddg_search(query):
59
- print(f"--- [TOOL] Searching Web: {query} ---")
60
  try:
61
  headers = {"User-Agent": "Mozilla/5.0"}
62
- res = requests.get("https://html.duckduckgo.com/html/", params={"q": query}, headers=headers, timeout=5)
 
 
 
 
 
63
  soup = BeautifulSoup(res.text, "html.parser")
64
- results = [f"- {r.select_one('.result__a').get_text(strip=True)}: {r.select_one('.result__snippet').get_text(strip=True)}"
65
- for r in soup.select(".result")[:3]]
66
- return f"\n[REAL-TIME KNOWLEDGE]:\n" + "\n".join(results) + "\n" if results else ""
67
- except: return ""
 
 
 
 
 
 
 
68
 
69
  def jina_read(url):
70
- print(f"--- [TOOL] Reading Source: {url} ---")
71
  try:
72
- res = requests.get(f"https://r.jina.ai/{url}", headers={"x-respond-with": "text"}, timeout=8)
73
- return f"\n[SOURCE CONTENT]:\n{res.text[:1200]}\n" if res.status_code == 200 else ""
74
- except: return ""
 
 
 
 
 
 
 
75
 
76
  def fast_intent_detection(message):
77
  url_match = re.search(r"(https?://\S+)", message)
78
- if url_match: return "URL", url_match.group(1)
 
 
79
  search_keywords = ["who is", "price", "latest", "current", "news", "today", "weather", "score", "stock"]
80
  if any(k in message.lower() for k in search_keywords):
81
  return "SEARCH", message
 
82
  return "CHAT", None
83
 
84
  # ==========================================
85
- # 4. SECURE ENDPOINT
86
  # ==========================================
 
87
  @app.middleware("http")
88
  async def security_guard(request: Request, call_next):
89
- if request.url.path == "/": return await call_next(request)
 
 
90
  if request.headers.get("x-reaperai-key") != SECRET_KEY:
91
  return JSONResponse(status_code=403, content={"error": "ACCESS_DENIED"})
 
92
  return await call_next(request)
93
 
 
 
 
 
94
  @app.post("/chat")
95
- def chat_endpoint(request: Request, payload: dict):
96
  if llm is None:
97
- return JSONResponse(status_code=503, content={"error": "Model not initialized"})
 
 
98
 
99
  user_id = payload.get("userId", "default")
100
  message = payload.get("message", "").strip()
 
101
  current_date = datetime.now().strftime("%A, %B %d, %Y")
102
-
103
  with memory_lock:
104
  if len(chat_memory) > MAX_GLOBAL_USERS:
105
  chat_memory.pop(next(iter(chat_memory)))
106
  history = chat_memory.get(user_id, [])[-5:]
107
 
108
  intent, data = fast_intent_detection(message)
109
- context = jina_read(data) if intent == "URL" else (ddg_search(message) if intent == "SEARCH" else "")
110
 
111
- prompt_messages = [{"role": "system", "content": f"You are ReaperAI. Precise and concise. Today: {current_date}"}]
 
 
 
 
 
 
 
 
 
112
  for h in history:
113
- prompt_messages.append({"role": "user", "content": h['u']})
114
- prompt_messages.append({"role": "assistant", "content": h['a']})
115
-
116
- final_input = f"Context: {context}\nQuery: {message}" if context else message
117
  prompt_messages.append({"role": "user", "content": final_input})
118
 
119
  with model_semaphore:
120
  try:
121
- output = llm.create_chat_completion(messages=prompt_messages, max_tokens=400, temperature=0.7)
 
 
 
 
122
  response = output["choices"][0]["message"]["content"].strip()
123
  except Exception as e:
124
  response = f"Core Error: {str(e)}"
125
 
126
  with memory_lock:
127
- if user_id not in chat_memory: chat_memory[user_id] = []
128
- chat_memory[user_id].append({"u": message, "a": response})
129
  chat_memory[user_id] = chat_memory[user_id][-8:]
130
 
131
  gc.collect()
 
132
  return {"intent": intent, "response": response}
133
 
 
 
 
 
134
  if __name__ == "__main__":
135
  import uvicorn
136
- uvicorn.run(app, host="0.0.0.0", port=7860, timeout_keep_alive=60)
 
7
  from datetime import datetime
8
  from bs4 import BeautifulSoup
9
  from contextlib import asynccontextmanager
10
+
11
  from fastapi import FastAPI, Request
12
  from fastapi.responses import JSONResponse
13
  from fastapi.middleware.cors import CORSMiddleware
14
+
15
+ from llama_cpp import Llama
16
  from huggingface_hub import hf_hub_download
17
 
18
  # ==========================================
19
  # 1. CONFIGURATION & CORE SETUP
20
  # ==========================================
21
+
22
  SECRET_KEY = os.environ.get("REAPERAI_SECRET", "jan30")
23
+
24
  MODEL_REPO = "bartowski/Qwen2.5-7B-Instruct-GGUF"
25
+ MODEL_FILENAME = "Qwen2.5-7B-Instruct-Q4_K_M.gguf"
26
 
27
  chat_memory = {}
28
  MAX_GLOBAL_USERS = 50
29
  memory_lock = threading.Lock()
30
+
31
+ # Only 1 inference at a time (CPU safety)
32
  model_semaphore = threading.Semaphore(1)
33
+
34
+ llm = None # will be set at startup
35
 
36
  # ==========================================
37
+ # 2. MODEL LOADING (HF Spaces Compatible)
38
  # ==========================================
39
+
40
  @asynccontextmanager
41
  async def lifespan(app: FastAPI):
 
42
  global llm
43
+ print(f"--- [SYSTEM] Downloading model from HF: {MODEL_REPO} ---")
44
+
45
  try:
46
+ model_path = hf_hub_download(
47
+ repo_id=MODEL_REPO,
48
+ filename=MODEL_FILENAME
49
+ )
50
+
51
+ print(f"--- [SYSTEM] Loading GGUF model from: {model_path} ---")
52
+
53
  llm = Llama(
54
+ model_path=model_path,
55
+ n_ctx=2048,
56
+ n_threads=4,
57
+ n_batch=512,
58
  verbose=False
59
  )
60
+
61
+ print("--- [SYSTEM] ReaperAI Core Online (GGUF) ---")
62
+
63
  except Exception as e:
64
+ print(f"--- [CRITICAL ERROR] Model load failed: {e} ---")
65
+ llm = None
66
+
67
  yield
68
 
69
+ # ==========================================
70
+ # 3. FASTAPI SETUP
71
+ # ==========================================
72
+
73
  app = FastAPI(title="ReaperAI Secure Core", lifespan=lifespan)
74
+
75
+ app.add_middleware(
76
+ CORSMiddleware,
77
+ allow_origins=["*"],
78
+ allow_methods=["POST"],
79
+ allow_headers=["*"]
80
+ )
81
 
82
  # ==========================================
83
+ # 4. AUTONOMOUS TOOLS
84
  # ==========================================
85
+
86
  def ddg_search(query):
 
87
  try:
88
  headers = {"User-Agent": "Mozilla/5.0"}
89
+ res = requests.get(
90
+ "https://html.duckduckgo.com/html/",
91
+ params={"q": query},
92
+ headers=headers,
93
+ timeout=6
94
+ )
95
  soup = BeautifulSoup(res.text, "html.parser")
96
+
97
+ results = []
98
+ for r in soup.select(".result")[:3]:
99
+ title = r.select_one(".result__a")
100
+ snippet = r.select_one(".result__snippet")
101
+ if title and snippet:
102
+ results.append(f"- {title.get_text(strip=True)}: {snippet.get_text(strip=True)}")
103
+
104
+ return "\n[REAL-TIME KNOWLEDGE]\n" + "\n".join(results) + "\n" if results else ""
105
+ except:
106
+ return ""
107
 
108
  def jina_read(url):
 
109
  try:
110
+ res = requests.get(
111
+ f"https://r.jina.ai/{url}",
112
+ headers={"x-respond-with": "text"},
113
+ timeout=8
114
+ )
115
+ if res.status_code == 200:
116
+ return f"\n[SOURCE CONTENT]\n{res.text[:1200]}\n"
117
+ except:
118
+ pass
119
+ return ""
120
 
121
  def fast_intent_detection(message):
122
  url_match = re.search(r"(https?://\S+)", message)
123
+ if url_match:
124
+ return "URL", url_match.group(1)
125
+
126
  search_keywords = ["who is", "price", "latest", "current", "news", "today", "weather", "score", "stock"]
127
  if any(k in message.lower() for k in search_keywords):
128
  return "SEARCH", message
129
+
130
  return "CHAT", None
131
 
132
  # ==========================================
133
+ # 5. SECURITY MIDDLEWARE
134
  # ==========================================
135
+
136
  @app.middleware("http")
137
  async def security_guard(request: Request, call_next):
138
+ if request.url.path == "/":
139
+ return await call_next(request)
140
+
141
  if request.headers.get("x-reaperai-key") != SECRET_KEY:
142
  return JSONResponse(status_code=403, content={"error": "ACCESS_DENIED"})
143
+
144
  return await call_next(request)
145
 
146
+ # ==========================================
147
+ # 6. CHAT ENDPOINT
148
+ # ==========================================
149
+
150
  @app.post("/chat")
151
+ async def chat_endpoint(request: Request):
152
  if llm is None:
153
+ return JSONResponse(status_code=503, content={"error": "MODEL_NOT_READY"})
154
+
155
+ payload = await request.json()
156
 
157
  user_id = payload.get("userId", "default")
158
  message = payload.get("message", "").strip()
159
+
160
  current_date = datetime.now().strftime("%A, %B %d, %Y")
161
+
162
  with memory_lock:
163
  if len(chat_memory) > MAX_GLOBAL_USERS:
164
  chat_memory.pop(next(iter(chat_memory)))
165
  history = chat_memory.get(user_id, [])[-5:]
166
 
167
  intent, data = fast_intent_detection(message)
 
168
 
169
+ context = ""
170
+ if intent == "URL":
171
+ context = jina_read(data)
172
+ elif intent == "SEARCH":
173
+ context = ddg_search(message)
174
+
175
+ prompt_messages = [
176
+ {"role": "system", "content": f"You are ReaperAI. Precise and concise. Today is {current_date}."}
177
+ ]
178
+
179
  for h in history:
180
+ prompt_messages.append({"role": "user", "content": h["u"]})
181
+ prompt_messages.append({"role": "assistant", "content": h["a"]})
182
+
183
+ final_input = f"Context:\n{context}\n\nUser query:\n{message}" if context else message
184
  prompt_messages.append({"role": "user", "content": final_input})
185
 
186
  with model_semaphore:
187
  try:
188
+ output = llm.create_chat_completion(
189
+ messages=prompt_messages,
190
+ max_tokens=400,
191
+ temperature=0.7
192
+ )
193
  response = output["choices"][0]["message"]["content"].strip()
194
  except Exception as e:
195
  response = f"Core Error: {str(e)}"
196
 
197
  with memory_lock:
198
+ chat_memory.setdefault(user_id, []).append({"u": message, "a": response})
 
199
  chat_memory[user_id] = chat_memory[user_id][-8:]
200
 
201
  gc.collect()
202
+
203
  return {"intent": intent, "response": response}
204
 
205
+ # ==========================================
206
+ # 7. LOCAL DEV ENTRY
207
+ # ==========================================
208
+
209
  if __name__ == "__main__":
210
  import uvicorn
211
+ uvicorn.run(app, host="0.0.0.0", port=7860)