Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -4,7 +4,13 @@ import random
|
|
| 4 |
import uuid
|
| 5 |
import datetime
|
| 6 |
from typing import List, Tuple, Dict, Optional, Generator, Any
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
import gradio as gr
|
| 9 |
import requests
|
| 10 |
from bs4 import BeautifulSoup
|
|
@@ -71,74 +77,51 @@ def create_index() -> None:
|
|
| 71 |
repo_id=REPO_NAME,
|
| 72 |
repo_type="dataset",
|
| 73 |
)
|
| 74 |
-
|
| 75 |
def save_memory(purpose: str, content: str) -> List[Dict]:
|
| 76 |
-
"""Save processed content to memory with indexing
|
| 77 |
uid = uuid.uuid4()
|
| 78 |
timestamp = datetime.datetime.now().strftime("%Y-%m-%d--%H-%M-%S")
|
| 79 |
|
| 80 |
try:
|
| 81 |
-
# Use
|
| 82 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
timestamp=timestamp,
|
| 84 |
purpose="Compile the provided data into structured memory"
|
| 85 |
-
) +
|
|
|
|
|
|
|
|
|
|
| 86 |
direction="Create structured memory entry",
|
| 87 |
knowledge="",
|
| 88 |
history=content[:5000] # Truncate for API limits
|
| 89 |
)
|
| 90 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
if VERBOSE:
|
| 92 |
-
print(
|
| 93 |
|
| 94 |
response = generate_response(prompt, model="anthropic/claude-2")
|
| 95 |
|
| 96 |
if VERBOSE:
|
| 97 |
-
print(
|
| 98 |
-
|
| 99 |
-
# Enhanced JSON extraction
|
| 100 |
-
json_str = response.split('```json')[1].split('```')[0] if '```json' in response else response
|
| 101 |
-
structured_data = json.loads(json_str.strip())
|
| 102 |
-
|
| 103 |
-
# Validate required structure
|
| 104 |
-
if not all(key in structured_data for key in ["keywords", "title", "content"]):
|
| 105 |
-
raise ValueError("Missing required fields in structured data")
|
| 106 |
-
|
| 107 |
-
# Create memory entry
|
| 108 |
-
memory_entry = {
|
| 109 |
-
**structured_data,
|
| 110 |
-
"file_name": f"{timestamp}--{uid}.json",
|
| 111 |
-
"timestamp": str(datetime.datetime.now()),
|
| 112 |
-
"source": "arxiv" if "arxiv.org" in content.lower() else "web"
|
| 113 |
-
}
|
| 114 |
-
|
| 115 |
-
# Update main memory file
|
| 116 |
-
main_url = f"{SAVE_DATA_URL}mem-test2/main.json"
|
| 117 |
-
m = requests.get(main_url)
|
| 118 |
-
main_data = json.loads(m.text) if m.status_code == 200 else []
|
| 119 |
-
main_data.append(memory_entry)
|
| 120 |
|
| 121 |
-
#
|
| 122 |
-
|
| 123 |
-
with open(main_path, "w") as f:
|
| 124 |
-
json.dump(main_data, f)
|
| 125 |
-
|
| 126 |
-
api.upload_file(
|
| 127 |
-
path_or_fileobj=main_path,
|
| 128 |
-
path_in_repo="/mem-test2/main.json",
|
| 129 |
-
repo_id=REPO_NAME,
|
| 130 |
-
repo_type="dataset",
|
| 131 |
-
)
|
| 132 |
-
|
| 133 |
-
# Update search index
|
| 134 |
-
create_index()
|
| 135 |
-
|
| 136 |
-
return [memory_entry]
|
| 137 |
|
| 138 |
except Exception as e:
|
| 139 |
-
|
|
|
|
| 140 |
return []
|
| 141 |
|
|
|
|
| 142 |
|
| 143 |
def fetch_url_content(url: str) -> Tuple[bool, str]:
|
| 144 |
"""Fetch content from a URL and return status and content."""
|
|
@@ -222,8 +205,14 @@ def summarize(
|
|
| 222 |
|
| 223 |
if mem_check:
|
| 224 |
memory_entries = save_memory(inp, processed_data)
|
| 225 |
-
|
| 226 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 227 |
history = [(inp, summary)]
|
| 228 |
|
| 229 |
yield "", history, "", json.dumps(memory_entries[0]) if mem_check else {}
|
|
|
|
| 4 |
import uuid
|
| 5 |
import datetime
|
| 6 |
from typing import List, Tuple, Dict, Optional, Generator, Any
|
| 7 |
+
from agent import (
|
| 8 |
+
PREFIX,
|
| 9 |
+
COMPRESS_DATA_PROMPT_SMALL,
|
| 10 |
+
COMPRESS_DATA_PROMPT,
|
| 11 |
+
LOG_PROMPT,
|
| 12 |
+
LOG_RESPONSE
|
| 13 |
+
)
|
| 14 |
import gradio as gr
|
| 15 |
import requests
|
| 16 |
from bs4 import BeautifulSoup
|
|
|
|
| 77 |
repo_id=REPO_NAME,
|
| 78 |
repo_type="dataset",
|
| 79 |
)
|
|
|
|
| 80 |
def save_memory(purpose: str, content: str) -> List[Dict]:
|
| 81 |
+
"""Save processed content to memory with indexing."""
|
| 82 |
uid = uuid.uuid4()
|
| 83 |
timestamp = datetime.datetime.now().strftime("%Y-%m-%d--%H-%M-%S")
|
| 84 |
|
| 85 |
try:
|
| 86 |
+
# Use default prompt if PREFIX isn't available
|
| 87 |
+
prefix = globals().get('PREFIX', """You are an Expert Information Retrieval Agent.
|
| 88 |
+
Current Date and Time is: {timestamp}
|
| 89 |
+
Purpose: {purpose}
|
| 90 |
+
""")
|
| 91 |
+
|
| 92 |
+
prompt = prefix.format(
|
| 93 |
timestamp=timestamp,
|
| 94 |
purpose="Compile the provided data into structured memory"
|
| 95 |
+
) + globals().get('COMPRESS_DATA_PROMPT_SMALL', """
|
| 96 |
+
Compile this data into JSON with keys: keywords, title, description, content, url.
|
| 97 |
+
Data: {history}
|
| 98 |
+
""").format(
|
| 99 |
direction="Create structured memory entry",
|
| 100 |
knowledge="",
|
| 101 |
history=content[:5000] # Truncate for API limits
|
| 102 |
)
|
| 103 |
|
| 104 |
+
# Use default logging if not available
|
| 105 |
+
log_prompt = globals().get('LOG_PROMPT', "PROMPT:\n{}\n")
|
| 106 |
+
log_response = globals().get('LOG_RESPONSE', "RESPONSE:\n{}\n")
|
| 107 |
+
|
| 108 |
if VERBOSE:
|
| 109 |
+
print(log_prompt.format(prompt))
|
| 110 |
|
| 111 |
response = generate_response(prompt, model="anthropic/claude-2")
|
| 112 |
|
| 113 |
if VERBOSE:
|
| 114 |
+
print(log_response.format(response))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
|
| 116 |
+
# Rest of your existing save_memory implementation...
|
| 117 |
+
# [Keep all the JSON processing and HF upload code]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
|
| 119 |
except Exception as e:
|
| 120 |
+
error_msg = f"Memory processing failed: {str(e)}"
|
| 121 |
+
print(log_response.format(error_msg) if 'log_response' in locals() else error_msg)
|
| 122 |
return []
|
| 123 |
|
| 124 |
+
|
| 125 |
|
| 126 |
def fetch_url_content(url: str) -> Tuple[bool, str]:
|
| 127 |
"""Fetch content from a URL and return status and content."""
|
|
|
|
| 205 |
|
| 206 |
if mem_check:
|
| 207 |
memory_entries = save_memory(inp, processed_data)
|
| 208 |
+
if memory_entries and isinstance(memory_entries, list) and len(memory_entries) > 0:
|
| 209 |
+
try:
|
| 210 |
+
summary += "\n\nSaved to memory with keywords: " + ", ".join(memory_entries[0].get('keywords', ['No keywords'])[:5])
|
| 211 |
+
except (KeyError, AttributeError):
|
| 212 |
+
summary += "\n\nSaved to memory (keyword extraction failed)"
|
| 213 |
+
else:
|
| 214 |
+
summary += "\n\nMemory save failed"
|
| 215 |
+
|
| 216 |
history = [(inp, summary)]
|
| 217 |
|
| 218 |
yield "", history, "", json.dumps(memory_entries[0]) if mem_check else {}
|