fugthchat commited on
Commit
e068f6b
·
verified ·
1 Parent(s): a05296f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +122 -21
app.py CHANGED
@@ -4,10 +4,13 @@ import json
4
  import psutil
5
  import asyncio
6
  import re
 
 
7
  from pathlib import Path
8
  from typing import Any, Dict, List, Optional
 
9
 
10
- from fastapi import FastAPI, Request, HTTPException
11
  from fastapi.responses import StreamingResponse
12
  from fastapi.middleware.cors import CORSMiddleware
13
  from llama_cpp import Llama
@@ -38,6 +41,9 @@ MODEL_MAP: Dict[str, str] = {
38
  current_model: Optional[Llama] = None
39
  current_model_name: str = ""
40
 
 
 
 
41
 
42
  def _model_abs_path(model_name: str) -> Path:
43
  # Always resolve relative to the app directory to avoid cwd surprises.
@@ -193,47 +199,140 @@ async def gen_title(request: Request):
193
  return {"title": "New Chat"}
194
 
195
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
  def extract_file_urls(message: str) -> List[str]:
197
- """Extract Google Drive file URLs from message."""
198
- pattern = r'https://drive\.google\.com/[^\s\)\"<>]*'
199
- return re.findall(pattern, message)
 
 
 
 
 
 
 
 
 
200
 
201
 
202
  async def fetch_file_from_url(file_url: str, max_size: int = 10 * 1024 * 1024) -> str:
203
  """
204
- Fetch a file from URL and return its content as text.
205
- Works with Google Drive URLs, text files, and can attempt text extraction from binary files.
 
 
 
206
  """
207
- if not aiohttp:
208
- return "[File fetching requires aiohttp - install via pip install aiohttp]"
209
-
210
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
  # Convert Google Drive sharing link to direct download link if needed
212
  if "drive.google.com" in file_url:
213
  # Extract file ID from Google Drive URL
214
  import re
215
- file_id_match = re.search(r'/d/([a-zA-Z0-9-_]+)', file_url)
 
216
  if not file_id_match:
217
- file_id_match = re.search(r'id=([a-zA-Z0-9-_]+)', file_url)
218
-
219
  if file_id_match:
220
  file_id = file_id_match.group(1)
221
  # Use export=download for Google Drive files
222
  file_url = f"https://drive.google.com/uc?id={file_id}&export=download"
223
-
224
  async with aiohttp.ClientSession() as session:
225
- async with session.get(file_url, timeout=aiohttp.ClientTimeout(total=15), allow_redirects=True) as resp:
 
 
226
  if resp.status != 200:
227
  return f"[Could not fetch file: HTTP {resp.status}]"
228
-
229
  content = await resp.read()
230
-
231
  if len(content) > max_size:
232
  return f"[File too large to process: {len(content) / 1024 / 1024:.1f}MB, max 10MB]"
233
-
234
  # Try to decode as text
235
  try:
236
- text = content.decode('utf-8')
237
  # Limit preview to first 3000 chars
238
  return text[:3000]
239
  except UnicodeDecodeError:
@@ -296,17 +395,19 @@ async def chat(request: Request):
296
  # Extract and fetch file URLs from the message
297
  file_urls = extract_file_urls(user_input)
298
  file_content_parts = []
299
-
300
  if file_urls:
301
  for url in file_urls:
302
  print(f"[File Processing] Fetching: {url[:80]}...")
303
  content = await fetch_file_from_url(url)
304
  if content:
305
  file_content_parts.append(content)
306
-
307
  # Append file contents to user input so the model can process them
308
  if file_content_parts:
309
- file_section = "\n\n[File Contents Retrieved]:\n" + "\n---\n".join(file_content_parts)
 
 
310
  user_input = user_input + file_section
311
 
312
  llm = get_model(model_file)
 
4
  import psutil
5
  import asyncio
6
  import re
7
+ import tempfile
8
+ import shutil
9
  from pathlib import Path
10
  from typing import Any, Dict, List, Optional
11
+ from datetime import datetime, timedelta
12
 
13
+ from fastapi import FastAPI, Request, HTTPException, UploadFile, File
14
  from fastapi.responses import StreamingResponse
15
  from fastapi.middleware.cors import CORSMiddleware
16
  from llama_cpp import Llama
 
41
  current_model: Optional[Llama] = None
42
  current_model_name: str = ""
43
 
44
+ # --- File Upload Configuration ---
45
+ UPLOAD_DIR = Path(tempfile.gettempdir()) / "hannah_uploads"
46
+
47
 
48
  def _model_abs_path(model_name: str) -> Path:
49
  # Always resolve relative to the app directory to avoid cwd surprises.
 
199
  return {"title": "New Chat"}
200
 
201
 
202
+ def cleanup_old_files(max_age_hours: int = 24):
203
+ """Remove files older than max_age_hours from upload directory."""
204
+ if not UPLOAD_DIR.exists():
205
+ return
206
+
207
+ now = datetime.now()
208
+ for file_path in UPLOAD_DIR.glob("*"):
209
+ if file_path.is_file():
210
+ file_age = now - datetime.fromtimestamp(file_path.stat().st_mtime)
211
+ if file_age.total_seconds() > max_age_hours * 3600:
212
+ try:
213
+ file_path.unlink()
214
+ except Exception:
215
+ pass
216
+
217
+
218
+ @app.post("/api/upload")
219
+ async def upload_file(file: UploadFile = File(...)):
220
+ """Upload a file and store it temporarily. Returns preview and file path."""
221
+ try:
222
+ # Create upload directory if it doesn't exist
223
+ UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
224
+
225
+ # Check file size (50MB limit)
226
+ content = await file.read()
227
+ if len(content) > 50 * 1024 * 1024:
228
+ raise HTTPException(status_code=413, detail="File too large (max 50MB)")
229
+
230
+ # Save file with timestamp
231
+ timestamp = datetime.now().timestamp()
232
+ file_path = UPLOAD_DIR / f"{timestamp}_{file.filename}"
233
+
234
+ with open(file_path, "wb") as f:
235
+ f.write(content)
236
+
237
+ # Try to extract text preview
238
+ preview = None
239
+ try:
240
+ text_content = content.decode("utf-8", errors="ignore")
241
+ preview = text_content[:1000] # First 1000 chars
242
+ except Exception:
243
+ pass
244
+
245
+ # Run cleanup in background
246
+ cleanup_old_files()
247
+
248
+ return {
249
+ "success": True,
250
+ "filename": file.filename,
251
+ "file_url": str(file_path),
252
+ "size_kb": len(content) / 1024,
253
+ "preview": preview,
254
+ }
255
+ except HTTPException:
256
+ raise
257
+ except Exception as e:
258
+ raise HTTPException(status_code=500, detail=str(e))
259
+
260
+
261
  def extract_file_urls(message: str) -> List[str]:
262
+ """Extract file URLs from message (Google Drive URLs and uploaded file paths)."""
263
+ urls = []
264
+
265
+ # Extract Google Drive URLs
266
+ drive_pattern = r"https://drive\.google\.com/[^\s\)\"<>]*"
267
+ urls.extend(re.findall(drive_pattern, message))
268
+
269
+ # Extract uploaded file references: [File uploaded: path]
270
+ upload_pattern = r"\[File uploaded: ([^\]]+)\]"
271
+ urls.extend(re.findall(upload_pattern, message))
272
+
273
+ return urls
274
 
275
 
276
  async def fetch_file_from_url(file_url: str, max_size: int = 10 * 1024 * 1024) -> str:
277
  """
278
+ Fetch a file from URL or local path and return its content as text.
279
+ Works with:
280
+ - Local file paths (uploaded files)
281
+ - Google Drive URLs
282
+ - Text files via HTTP
283
  """
 
 
 
284
  try:
285
+ # Check if it's a local file path first
286
+ local_path = Path(file_url)
287
+ if local_path.exists() and local_path.is_file():
288
+ try:
289
+ with open(local_path, "rb") as f:
290
+ content = f.read()
291
+
292
+ if len(content) > max_size:
293
+ return f"[File too large to process: {len(content) / 1024 / 1024:.1f}MB, max 10MB]"
294
+
295
+ try:
296
+ text = content.decode("utf-8", errors="ignore")
297
+ return text[:3000]
298
+ except Exception:
299
+ return f"[Binary file detected. Size: {len(content) / 1024:.1f}KB.]"
300
+ except Exception as e:
301
+ return f"[Could not read local file: {str(e)[:100]}]"
302
+
303
+ # Handle remote URLs (Google Drive, HTTP, etc.)
304
+ if not aiohttp:
305
+ return "[File fetching requires aiohttp - install via pip install aiohttp]"
306
+
307
  # Convert Google Drive sharing link to direct download link if needed
308
  if "drive.google.com" in file_url:
309
  # Extract file ID from Google Drive URL
310
  import re
311
+
312
+ file_id_match = re.search(r"/d/([a-zA-Z0-9-_]+)", file_url)
313
  if not file_id_match:
314
+ file_id_match = re.search(r"id=([a-zA-Z0-9-_]+)", file_url)
315
+
316
  if file_id_match:
317
  file_id = file_id_match.group(1)
318
  # Use export=download for Google Drive files
319
  file_url = f"https://drive.google.com/uc?id={file_id}&export=download"
320
+
321
  async with aiohttp.ClientSession() as session:
322
+ async with session.get(
323
+ file_url, timeout=aiohttp.ClientTimeout(total=15), allow_redirects=True
324
+ ) as resp:
325
  if resp.status != 200:
326
  return f"[Could not fetch file: HTTP {resp.status}]"
327
+
328
  content = await resp.read()
329
+
330
  if len(content) > max_size:
331
  return f"[File too large to process: {len(content) / 1024 / 1024:.1f}MB, max 10MB]"
332
+
333
  # Try to decode as text
334
  try:
335
+ text = content.decode("utf-8")
336
  # Limit preview to first 3000 chars
337
  return text[:3000]
338
  except UnicodeDecodeError:
 
395
  # Extract and fetch file URLs from the message
396
  file_urls = extract_file_urls(user_input)
397
  file_content_parts = []
398
+
399
  if file_urls:
400
  for url in file_urls:
401
  print(f"[File Processing] Fetching: {url[:80]}...")
402
  content = await fetch_file_from_url(url)
403
  if content:
404
  file_content_parts.append(content)
405
+
406
  # Append file contents to user input so the model can process them
407
  if file_content_parts:
408
+ file_section = "\n\n[File Contents Retrieved]:\n" + "\n---\n".join(
409
+ file_content_parts
410
+ )
411
  user_input = user_input + file_section
412
 
413
  llm = get_model(model_file)