fugthchat commited on
Commit
a05296f
·
verified ·
1 Parent(s): 812549a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -4
app.py CHANGED
@@ -2,6 +2,8 @@ import os
2
  import glob
3
  import json
4
  import psutil
 
 
5
  from pathlib import Path
6
  from typing import Any, Dict, List, Optional
7
 
@@ -10,6 +12,11 @@ from fastapi.responses import StreamingResponse
10
  from fastapi.middleware.cors import CORSMiddleware
11
  from llama_cpp import Llama
12
 
 
 
 
 
 
13
  app = FastAPI(title="Hannah Pilot Interface")
14
 
15
  # --- CORS Permissions ---
@@ -186,13 +193,67 @@ async def gen_title(request: Request):
186
  return {"title": "New Chat"}
187
 
188
 
189
- def build_prompt(user_input: str, history: List[Dict[str, str]], has_web_context: bool = False) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  # Qwen 2.5 chat format with optional web context awareness
191
  system = (
192
  "You are Hannah 1.0, an intelligent, fast, and helpful AI assistant. "
193
  "Answer clearly and accurately. "
194
  )
195
-
196
  # If web context is available, instruct the model to use it
197
  if has_web_context:
198
  system += (
@@ -200,7 +261,7 @@ def build_prompt(user_input: str, history: List[Dict[str, str]], has_web_context
200
  "Use this context to provide current, accurate information about recent events and dates. "
201
  "Reference the sources when relevant. "
202
  )
203
-
204
  system += (
205
  "Keep responses concise but helpful. "
206
  "If asked about your model or training details, simply say: 'I'm Hannah - a helpful AI assistant.' "
@@ -232,8 +293,24 @@ async def chat(request: Request):
232
  if not user_input:
233
  raise HTTPException(status_code=400, detail="Empty message")
234
 
235
- llm = get_model(model_file)
 
 
236
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
  # Detect if the message includes web context
238
  has_web_context = has_web and "[Web context retrieved on" in user_input
239
 
 
2
  import glob
3
  import json
4
  import psutil
5
+ import asyncio
6
+ import re
7
  from pathlib import Path
8
  from typing import Any, Dict, List, Optional
9
 
 
12
  from fastapi.middleware.cors import CORSMiddleware
13
  from llama_cpp import Llama
14
 
15
+ try:
16
+ import aiohttp
17
+ except ImportError:
18
+ aiohttp = None
19
+
20
  app = FastAPI(title="Hannah Pilot Interface")
21
 
22
  # --- CORS Permissions ---
 
193
  return {"title": "New Chat"}
194
 
195
 
196
+ def extract_file_urls(message: str) -> List[str]:
197
+ """Extract Google Drive file URLs from message."""
198
+ pattern = r'https://drive\.google\.com/[^\s\)\"<>]*'
199
+ return re.findall(pattern, message)
200
+
201
+
202
+ async def fetch_file_from_url(file_url: str, max_size: int = 10 * 1024 * 1024) -> str:
203
+ """
204
+ Fetch a file from URL and return its content as text.
205
+ Works with Google Drive URLs, text files, and can attempt text extraction from binary files.
206
+ """
207
+ if not aiohttp:
208
+ return "[File fetching requires aiohttp - install via pip install aiohttp]"
209
+
210
+ try:
211
+ # Convert Google Drive sharing link to direct download link if needed
212
+ if "drive.google.com" in file_url:
213
+ # Extract file ID from Google Drive URL
214
+ import re
215
+ file_id_match = re.search(r'/d/([a-zA-Z0-9-_]+)', file_url)
216
+ if not file_id_match:
217
+ file_id_match = re.search(r'id=([a-zA-Z0-9-_]+)', file_url)
218
+
219
+ if file_id_match:
220
+ file_id = file_id_match.group(1)
221
+ # Use export=download for Google Drive files
222
+ file_url = f"https://drive.google.com/uc?id={file_id}&export=download"
223
+
224
+ async with aiohttp.ClientSession() as session:
225
+ async with session.get(file_url, timeout=aiohttp.ClientTimeout(total=15), allow_redirects=True) as resp:
226
+ if resp.status != 200:
227
+ return f"[Could not fetch file: HTTP {resp.status}]"
228
+
229
+ content = await resp.read()
230
+
231
+ if len(content) > max_size:
232
+ return f"[File too large to process: {len(content) / 1024 / 1024:.1f}MB, max 10MB]"
233
+
234
+ # Try to decode as text
235
+ try:
236
+ text = content.decode('utf-8')
237
+ # Limit preview to first 3000 chars
238
+ return text[:3000]
239
+ except UnicodeDecodeError:
240
+ # For binary files, return a note
241
+ return f"[Binary file detected. Size: {len(content) / 1024:.1f}KB. Please describe what you see in it.]"
242
+ except asyncio.TimeoutError:
243
+ return "[File fetch timed out - file may be too large or URL invalid]"
244
+ except Exception as e:
245
+ return f"[Could not fetch file: {str(e)[:100]}]"
246
+
247
+
248
+ def build_prompt(
249
+ user_input: str, history: List[Dict[str, str]], has_web_context: bool = False
250
+ ) -> str:
251
  # Qwen 2.5 chat format with optional web context awareness
252
  system = (
253
  "You are Hannah 1.0, an intelligent, fast, and helpful AI assistant. "
254
  "Answer clearly and accurately. "
255
  )
256
+
257
  # If web context is available, instruct the model to use it
258
  if has_web_context:
259
  system += (
 
261
  "Use this context to provide current, accurate information about recent events and dates. "
262
  "Reference the sources when relevant. "
263
  )
264
+
265
  system += (
266
  "Keep responses concise but helpful. "
267
  "If asked about your model or training details, simply say: 'I'm Hannah - a helpful AI assistant.' "
 
293
  if not user_input:
294
  raise HTTPException(status_code=400, detail="Empty message")
295
 
296
+ # Extract and fetch file URLs from the message
297
+ file_urls = extract_file_urls(user_input)
298
+ file_content_parts = []
299
 
300
+ if file_urls:
301
+ for url in file_urls:
302
+ print(f"[File Processing] Fetching: {url[:80]}...")
303
+ content = await fetch_file_from_url(url)
304
+ if content:
305
+ file_content_parts.append(content)
306
+
307
+ # Append file contents to user input so the model can process them
308
+ if file_content_parts:
309
+ file_section = "\n\n[File Contents Retrieved]:\n" + "\n---\n".join(file_content_parts)
310
+ user_input = user_input + file_section
311
+
312
+ llm = get_model(model_file)
313
+
314
  # Detect if the message includes web context
315
  has_web_context = has_web and "[Web context retrieved on" in user_input
316