Spaces:

fugthchat
/

Hannah-Pilot-Interface

Sleeping

App Files Files Community

fugthchat commited on Dec 20, 2025

Commit

a05296f

verified ·

1 Parent(s): 812549a

Update app.py

Browse files

Files changed (1) hide show

app.py +81 -4

app.py CHANGED Viewed

@@ -2,6 +2,8 @@ import os
 import glob
 import json
 import psutil
 from pathlib import Path
 from typing import Any, Dict, List, Optional
@@ -10,6 +12,11 @@ from fastapi.responses import StreamingResponse
 from fastapi.middleware.cors import CORSMiddleware
 from llama_cpp import Llama
 app = FastAPI(title="Hannah Pilot Interface")
 # --- CORS Permissions ---
@@ -186,13 +193,67 @@ async def gen_title(request: Request):
         return {"title": "New Chat"}
-def build_prompt(user_input: str, history: List[Dict[str, str]], has_web_context: bool = False) -> str:
     # Qwen 2.5 chat format with optional web context awareness
     system = (
         "You are Hannah 1.0, an intelligent, fast, and helpful AI assistant. "
         "Answer clearly and accurately. "
     )
     # If web context is available, instruct the model to use it
     if has_web_context:
         system += (
@@ -200,7 +261,7 @@ def build_prompt(user_input: str, history: List[Dict[str, str]], has_web_context
             "Use this context to provide current, accurate information about recent events and dates. "
             "Reference the sources when relevant. "
         )
     system += (
         "Keep responses concise but helpful. "
         "If asked about your model or training details, simply say: 'I'm Hannah - a helpful AI assistant.' "
@@ -232,8 +293,24 @@ async def chat(request: Request):
     if not user_input:
         raise HTTPException(status_code=400, detail="Empty message")
-    llm = get_model(model_file)
     # Detect if the message includes web context
     has_web_context = has_web and "[Web context retrieved on" in user_input

 import glob
 import json
 import psutil
+import asyncio
+import re
 from pathlib import Path
 from typing import Any, Dict, List, Optional
 from fastapi.middleware.cors import CORSMiddleware
 from llama_cpp import Llama
+try:
+    import aiohttp
+except ImportError:
+    aiohttp = None
 app = FastAPI(title="Hannah Pilot Interface")
 # --- CORS Permissions ---
         return {"title": "New Chat"}
+def extract_file_urls(message: str) -> List[str]:
+    """Extract Google Drive file URLs from message."""
+    pattern = r'https://drive\.google\.com/[^\s\)\"<>]*'
+    return re.findall(pattern, message)
+async def fetch_file_from_url(file_url: str, max_size: int = 10 * 1024 * 1024) -> str:
+    """
+    Fetch a file from URL and return its content as text.
+    Works with Google Drive URLs, text files, and can attempt text extraction from binary files.
+    """
+    if not aiohttp:
+        return "[File fetching requires aiohttp - install via pip install aiohttp]"
+    try:
+        # Convert Google Drive sharing link to direct download link if needed
+        if "drive.google.com" in file_url:
+            # Extract file ID from Google Drive URL
+            import re
+            file_id_match = re.search(r'/d/([a-zA-Z0-9-_]+)', file_url)
+            if not file_id_match:
+                file_id_match = re.search(r'id=([a-zA-Z0-9-_]+)', file_url)
+            if file_id_match:
+                file_id = file_id_match.group(1)
+                # Use export=download for Google Drive files
+                file_url = f"https://drive.google.com/uc?id={file_id}&export=download"
+        async with aiohttp.ClientSession() as session:
+            async with session.get(file_url, timeout=aiohttp.ClientTimeout(total=15), allow_redirects=True) as resp:
+                if resp.status != 200:
+                    return f"[Could not fetch file: HTTP {resp.status}]"
+                content = await resp.read()
+                if len(content) > max_size:
+                    return f"[File too large to process: {len(content) / 1024 / 1024:.1f}MB, max 10MB]"
+                # Try to decode as text
+                try:
+                    text = content.decode('utf-8')
+                    # Limit preview to first 3000 chars
+                    return text[:3000]
+                except UnicodeDecodeError:
+                    # For binary files, return a note
+                    return f"[Binary file detected. Size: {len(content) / 1024:.1f}KB. Please describe what you see in it.]"
+    except asyncio.TimeoutError:
+        return "[File fetch timed out - file may be too large or URL invalid]"
+    except Exception as e:
+        return f"[Could not fetch file: {str(e)[:100]}]"
+def build_prompt(
+    user_input: str, history: List[Dict[str, str]], has_web_context: bool = False
+) -> str:
     # Qwen 2.5 chat format with optional web context awareness
     system = (
         "You are Hannah 1.0, an intelligent, fast, and helpful AI assistant. "
         "Answer clearly and accurately. "
     )
     # If web context is available, instruct the model to use it
     if has_web_context:
         system += (
             "Use this context to provide current, accurate information about recent events and dates. "
             "Reference the sources when relevant. "
         )
     system += (
         "Keep responses concise but helpful. "
         "If asked about your model or training details, simply say: 'I'm Hannah - a helpful AI assistant.' "
     if not user_input:
         raise HTTPException(status_code=400, detail="Empty message")
+    # Extract and fetch file URLs from the message
+    file_urls = extract_file_urls(user_input)
+    file_content_parts = []
+    if file_urls:
+        for url in file_urls:
+            print(f"[File Processing] Fetching: {url[:80]}...")
+            content = await fetch_file_from_url(url)
+            if content:
+                file_content_parts.append(content)
+        # Append file contents to user input so the model can process them
+        if file_content_parts:
+            file_section = "\n\n[File Contents Retrieved]:\n" + "\n---\n".join(file_content_parts)
+            user_input = user_input + file_section
+    llm = get_model(model_file)
     # Detect if the message includes web context
     has_web_context = has_web and "[Web context retrieved on" in user_input