Spaces:

fugthchat
/

Hannah-Pilot-Interface

Sleeping

App Files Files Community

fugthchat commited on Dec 20, 2025

Commit

e068f6b

verified ·

1 Parent(s): a05296f

Update app.py

Browse files

Files changed (1) hide show

app.py +122 -21

app.py CHANGED Viewed

@@ -4,10 +4,13 @@ import json
 import psutil
 import asyncio
 import re
 from pathlib import Path
 from typing import Any, Dict, List, Optional
-from fastapi import FastAPI, Request, HTTPException
 from fastapi.responses import StreamingResponse
 from fastapi.middleware.cors import CORSMiddleware
 from llama_cpp import Llama
@@ -38,6 +41,9 @@ MODEL_MAP: Dict[str, str] = {
 current_model: Optional[Llama] = None
 current_model_name: str = ""
 def _model_abs_path(model_name: str) -> Path:
     # Always resolve relative to the app directory to avoid cwd surprises.
@@ -193,47 +199,140 @@ async def gen_title(request: Request):
         return {"title": "New Chat"}
 def extract_file_urls(message: str) -> List[str]:
-    """Extract Google Drive file URLs from message."""
-    pattern = r'https://drive\.google\.com/[^\s\)\"<>]*'
-    return re.findall(pattern, message)
 async def fetch_file_from_url(file_url: str, max_size: int = 10 * 1024 * 1024) -> str:
     """
-    Fetch a file from URL and return its content as text.
-    Works with Google Drive URLs, text files, and can attempt text extraction from binary files.
     """
-    if not aiohttp:
-        return "[File fetching requires aiohttp - install via pip install aiohttp]"
     try:
         # Convert Google Drive sharing link to direct download link if needed
         if "drive.google.com" in file_url:
             # Extract file ID from Google Drive URL
             import re
-            file_id_match = re.search(r'/d/([a-zA-Z0-9-_]+)', file_url)
             if not file_id_match:
-                file_id_match = re.search(r'id=([a-zA-Z0-9-_]+)', file_url)
             if file_id_match:
                 file_id = file_id_match.group(1)
                 # Use export=download for Google Drive files
                 file_url = f"https://drive.google.com/uc?id={file_id}&export=download"
         async with aiohttp.ClientSession() as session:
-            async with session.get(file_url, timeout=aiohttp.ClientTimeout(total=15), allow_redirects=True) as resp:
                 if resp.status != 200:
                     return f"[Could not fetch file: HTTP {resp.status}]"
                 content = await resp.read()
                 if len(content) > max_size:
                     return f"[File too large to process: {len(content) / 1024 / 1024:.1f}MB, max 10MB]"
                 # Try to decode as text
                 try:
-                    text = content.decode('utf-8')
                     # Limit preview to first 3000 chars
                     return text[:3000]
                 except UnicodeDecodeError:
@@ -296,17 +395,19 @@ async def chat(request: Request):
     # Extract and fetch file URLs from the message
     file_urls = extract_file_urls(user_input)
     file_content_parts = []
     if file_urls:
         for url in file_urls:
             print(f"[File Processing] Fetching: {url[:80]}...")
             content = await fetch_file_from_url(url)
             if content:
                 file_content_parts.append(content)
         # Append file contents to user input so the model can process them
         if file_content_parts:
-            file_section = "\n\n[File Contents Retrieved]:\n" + "\n---\n".join(file_content_parts)
             user_input = user_input + file_section
     llm = get_model(model_file)

 import psutil
 import asyncio
 import re
+import tempfile
+import shutil
 from pathlib import Path
 from typing import Any, Dict, List, Optional
+from datetime import datetime, timedelta
+from fastapi import FastAPI, Request, HTTPException, UploadFile, File
 from fastapi.responses import StreamingResponse
 from fastapi.middleware.cors import CORSMiddleware
 from llama_cpp import Llama
 current_model: Optional[Llama] = None
 current_model_name: str = ""
+# --- File Upload Configuration ---
+UPLOAD_DIR = Path(tempfile.gettempdir()) / "hannah_uploads"
 def _model_abs_path(model_name: str) -> Path:
     # Always resolve relative to the app directory to avoid cwd surprises.
         return {"title": "New Chat"}
+def cleanup_old_files(max_age_hours: int = 24):
+    """Remove files older than max_age_hours from upload directory."""
+    if not UPLOAD_DIR.exists():
+        return
+    now = datetime.now()
+    for file_path in UPLOAD_DIR.glob("*"):
+        if file_path.is_file():
+            file_age = now - datetime.fromtimestamp(file_path.stat().st_mtime)
+            if file_age.total_seconds() > max_age_hours * 3600:
+                try:
+                    file_path.unlink()
+                except Exception:
+                    pass
+@app.post("/api/upload")
+async def upload_file(file: UploadFile = File(...)):
+    """Upload a file and store it temporarily. Returns preview and file path."""
+    try:
+        # Create upload directory if it doesn't exist
+        UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
+        # Check file size (50MB limit)
+        content = await file.read()
+        if len(content) > 50 * 1024 * 1024:
+            raise HTTPException(status_code=413, detail="File too large (max 50MB)")
+        # Save file with timestamp
+        timestamp = datetime.now().timestamp()
+        file_path = UPLOAD_DIR / f"{timestamp}_{file.filename}"
+        with open(file_path, "wb") as f:
+            f.write(content)
+        # Try to extract text preview
+        preview = None
+        try:
+            text_content = content.decode("utf-8", errors="ignore")
+            preview = text_content[:1000]  # First 1000 chars
+        except Exception:
+            pass
+        # Run cleanup in background
+        cleanup_old_files()
+        return {
+            "success": True,
+            "filename": file.filename,
+            "file_url": str(file_path),
+            "size_kb": len(content) / 1024,
+            "preview": preview,
+        }
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
 def extract_file_urls(message: str) -> List[str]:
+    """Extract file URLs from message (Google Drive URLs and uploaded file paths)."""
+    urls = []
+    # Extract Google Drive URLs
+    drive_pattern = r"https://drive\.google\.com/[^\s\)\"<>]*"
+    urls.extend(re.findall(drive_pattern, message))
+    # Extract uploaded file references: [File uploaded: path]
+    upload_pattern = r"\[File uploaded: ([^\]]+)\]"
+    urls.extend(re.findall(upload_pattern, message))
+    return urls
 async def fetch_file_from_url(file_url: str, max_size: int = 10 * 1024 * 1024) -> str:
     """
+    Fetch a file from URL or local path and return its content as text.
+    Works with:
+    - Local file paths (uploaded files)
+    - Google Drive URLs
+    - Text files via HTTP
     """
     try:
+        # Check if it's a local file path first
+        local_path = Path(file_url)
+        if local_path.exists() and local_path.is_file():
+            try:
+                with open(local_path, "rb") as f:
+                    content = f.read()
+                if len(content) > max_size:
+                    return f"[File too large to process: {len(content) / 1024 / 1024:.1f}MB, max 10MB]"
+                try:
+                    text = content.decode("utf-8", errors="ignore")
+                    return text[:3000]
+                except Exception:
+                    return f"[Binary file detected. Size: {len(content) / 1024:.1f}KB.]"
+            except Exception as e:
+                return f"[Could not read local file: {str(e)[:100]}]"
+        # Handle remote URLs (Google Drive, HTTP, etc.)
+        if not aiohttp:
+            return "[File fetching requires aiohttp - install via pip install aiohttp]"
         # Convert Google Drive sharing link to direct download link if needed
         if "drive.google.com" in file_url:
             # Extract file ID from Google Drive URL
             import re
+            file_id_match = re.search(r"/d/([a-zA-Z0-9-_]+)", file_url)
             if not file_id_match:
+                file_id_match = re.search(r"id=([a-zA-Z0-9-_]+)", file_url)
             if file_id_match:
                 file_id = file_id_match.group(1)
                 # Use export=download for Google Drive files
                 file_url = f"https://drive.google.com/uc?id={file_id}&export=download"
         async with aiohttp.ClientSession() as session:
+            async with session.get(
+                file_url, timeout=aiohttp.ClientTimeout(total=15), allow_redirects=True
+            ) as resp:
                 if resp.status != 200:
                     return f"[Could not fetch file: HTTP {resp.status}]"
                 content = await resp.read()
                 if len(content) > max_size:
                     return f"[File too large to process: {len(content) / 1024 / 1024:.1f}MB, max 10MB]"
                 # Try to decode as text
                 try:
+                    text = content.decode("utf-8")
                     # Limit preview to first 3000 chars
                     return text[:3000]
                 except UnicodeDecodeError:
     # Extract and fetch file URLs from the message
     file_urls = extract_file_urls(user_input)
     file_content_parts = []
     if file_urls:
         for url in file_urls:
             print(f"[File Processing] Fetching: {url[:80]}...")
             content = await fetch_file_from_url(url)
             if content:
                 file_content_parts.append(content)
         # Append file contents to user input so the model can process them
         if file_content_parts:
+            file_section = "\n\n[File Contents Retrieved]:\n" + "\n---\n".join(
+                file_content_parts
+            )
             user_input = user_input + file_section
     llm = get_model(model_file)