Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,6 +2,8 @@ import os
|
|
| 2 |
import glob
|
| 3 |
import json
|
| 4 |
import psutil
|
|
|
|
|
|
|
| 5 |
from pathlib import Path
|
| 6 |
from typing import Any, Dict, List, Optional
|
| 7 |
|
|
@@ -10,6 +12,11 @@ from fastapi.responses import StreamingResponse
|
|
| 10 |
from fastapi.middleware.cors import CORSMiddleware
|
| 11 |
from llama_cpp import Llama
|
| 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
app = FastAPI(title="Hannah Pilot Interface")
|
| 14 |
|
| 15 |
# --- CORS Permissions ---
|
|
@@ -186,13 +193,67 @@ async def gen_title(request: Request):
|
|
| 186 |
return {"title": "New Chat"}
|
| 187 |
|
| 188 |
|
| 189 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
# Qwen 2.5 chat format with optional web context awareness
|
| 191 |
system = (
|
| 192 |
"You are Hannah 1.0, an intelligent, fast, and helpful AI assistant. "
|
| 193 |
"Answer clearly and accurately. "
|
| 194 |
)
|
| 195 |
-
|
| 196 |
# If web context is available, instruct the model to use it
|
| 197 |
if has_web_context:
|
| 198 |
system += (
|
|
@@ -200,7 +261,7 @@ def build_prompt(user_input: str, history: List[Dict[str, str]], has_web_context
|
|
| 200 |
"Use this context to provide current, accurate information about recent events and dates. "
|
| 201 |
"Reference the sources when relevant. "
|
| 202 |
)
|
| 203 |
-
|
| 204 |
system += (
|
| 205 |
"Keep responses concise but helpful. "
|
| 206 |
"If asked about your model or training details, simply say: 'I'm Hannah - a helpful AI assistant.' "
|
|
@@ -232,8 +293,24 @@ async def chat(request: Request):
|
|
| 232 |
if not user_input:
|
| 233 |
raise HTTPException(status_code=400, detail="Empty message")
|
| 234 |
|
| 235 |
-
|
|
|
|
|
|
|
| 236 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 237 |
# Detect if the message includes web context
|
| 238 |
has_web_context = has_web and "[Web context retrieved on" in user_input
|
| 239 |
|
|
|
|
| 2 |
import glob
|
| 3 |
import json
|
| 4 |
import psutil
|
| 5 |
+
import asyncio
|
| 6 |
+
import re
|
| 7 |
from pathlib import Path
|
| 8 |
from typing import Any, Dict, List, Optional
|
| 9 |
|
|
|
|
| 12 |
from fastapi.middleware.cors import CORSMiddleware
|
| 13 |
from llama_cpp import Llama
|
| 14 |
|
| 15 |
+
try:
|
| 16 |
+
import aiohttp
|
| 17 |
+
except ImportError:
|
| 18 |
+
aiohttp = None
|
| 19 |
+
|
| 20 |
app = FastAPI(title="Hannah Pilot Interface")
|
| 21 |
|
| 22 |
# --- CORS Permissions ---
|
|
|
|
| 193 |
return {"title": "New Chat"}
|
| 194 |
|
| 195 |
|
| 196 |
+
def extract_file_urls(message: str) -> List[str]:
|
| 197 |
+
"""Extract Google Drive file URLs from message."""
|
| 198 |
+
pattern = r'https://drive\.google\.com/[^\s\)\"<>]*'
|
| 199 |
+
return re.findall(pattern, message)
|
| 200 |
+
|
| 201 |
+
|
| 202 |
+
async def fetch_file_from_url(file_url: str, max_size: int = 10 * 1024 * 1024) -> str:
|
| 203 |
+
"""
|
| 204 |
+
Fetch a file from URL and return its content as text.
|
| 205 |
+
Works with Google Drive URLs, text files, and can attempt text extraction from binary files.
|
| 206 |
+
"""
|
| 207 |
+
if not aiohttp:
|
| 208 |
+
return "[File fetching requires aiohttp - install via pip install aiohttp]"
|
| 209 |
+
|
| 210 |
+
try:
|
| 211 |
+
# Convert Google Drive sharing link to direct download link if needed
|
| 212 |
+
if "drive.google.com" in file_url:
|
| 213 |
+
# Extract file ID from Google Drive URL
|
| 214 |
+
import re
|
| 215 |
+
file_id_match = re.search(r'/d/([a-zA-Z0-9-_]+)', file_url)
|
| 216 |
+
if not file_id_match:
|
| 217 |
+
file_id_match = re.search(r'id=([a-zA-Z0-9-_]+)', file_url)
|
| 218 |
+
|
| 219 |
+
if file_id_match:
|
| 220 |
+
file_id = file_id_match.group(1)
|
| 221 |
+
# Use export=download for Google Drive files
|
| 222 |
+
file_url = f"https://drive.google.com/uc?id={file_id}&export=download"
|
| 223 |
+
|
| 224 |
+
async with aiohttp.ClientSession() as session:
|
| 225 |
+
async with session.get(file_url, timeout=aiohttp.ClientTimeout(total=15), allow_redirects=True) as resp:
|
| 226 |
+
if resp.status != 200:
|
| 227 |
+
return f"[Could not fetch file: HTTP {resp.status}]"
|
| 228 |
+
|
| 229 |
+
content = await resp.read()
|
| 230 |
+
|
| 231 |
+
if len(content) > max_size:
|
| 232 |
+
return f"[File too large to process: {len(content) / 1024 / 1024:.1f}MB, max 10MB]"
|
| 233 |
+
|
| 234 |
+
# Try to decode as text
|
| 235 |
+
try:
|
| 236 |
+
text = content.decode('utf-8')
|
| 237 |
+
# Limit preview to first 3000 chars
|
| 238 |
+
return text[:3000]
|
| 239 |
+
except UnicodeDecodeError:
|
| 240 |
+
# For binary files, return a note
|
| 241 |
+
return f"[Binary file detected. Size: {len(content) / 1024:.1f}KB. Please describe what you see in it.]"
|
| 242 |
+
except asyncio.TimeoutError:
|
| 243 |
+
return "[File fetch timed out - file may be too large or URL invalid]"
|
| 244 |
+
except Exception as e:
|
| 245 |
+
return f"[Could not fetch file: {str(e)[:100]}]"
|
| 246 |
+
|
| 247 |
+
|
| 248 |
+
def build_prompt(
|
| 249 |
+
user_input: str, history: List[Dict[str, str]], has_web_context: bool = False
|
| 250 |
+
) -> str:
|
| 251 |
# Qwen 2.5 chat format with optional web context awareness
|
| 252 |
system = (
|
| 253 |
"You are Hannah 1.0, an intelligent, fast, and helpful AI assistant. "
|
| 254 |
"Answer clearly and accurately. "
|
| 255 |
)
|
| 256 |
+
|
| 257 |
# If web context is available, instruct the model to use it
|
| 258 |
if has_web_context:
|
| 259 |
system += (
|
|
|
|
| 261 |
"Use this context to provide current, accurate information about recent events and dates. "
|
| 262 |
"Reference the sources when relevant. "
|
| 263 |
)
|
| 264 |
+
|
| 265 |
system += (
|
| 266 |
"Keep responses concise but helpful. "
|
| 267 |
"If asked about your model or training details, simply say: 'I'm Hannah - a helpful AI assistant.' "
|
|
|
|
| 293 |
if not user_input:
|
| 294 |
raise HTTPException(status_code=400, detail="Empty message")
|
| 295 |
|
| 296 |
+
# Extract and fetch file URLs from the message
|
| 297 |
+
file_urls = extract_file_urls(user_input)
|
| 298 |
+
file_content_parts = []
|
| 299 |
|
| 300 |
+
if file_urls:
|
| 301 |
+
for url in file_urls:
|
| 302 |
+
print(f"[File Processing] Fetching: {url[:80]}...")
|
| 303 |
+
content = await fetch_file_from_url(url)
|
| 304 |
+
if content:
|
| 305 |
+
file_content_parts.append(content)
|
| 306 |
+
|
| 307 |
+
# Append file contents to user input so the model can process them
|
| 308 |
+
if file_content_parts:
|
| 309 |
+
file_section = "\n\n[File Contents Retrieved]:\n" + "\n---\n".join(file_content_parts)
|
| 310 |
+
user_input = user_input + file_section
|
| 311 |
+
|
| 312 |
+
llm = get_model(model_file)
|
| 313 |
+
|
| 314 |
# Detect if the message includes web context
|
| 315 |
has_web_context = has_web and "[Web context retrieved on" in user_input
|
| 316 |
|