Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -73,47 +73,72 @@ def create_index() -> None:
|
|
| 73 |
)
|
| 74 |
|
| 75 |
def save_memory(purpose: str, content: str) -> List[Dict]:
|
| 76 |
-
"""Save processed content to memory with indexing."""
|
| 77 |
uid = uuid.uuid4()
|
| 78 |
timestamp = datetime.datetime.now().strftime("%Y-%m-%d--%H-%M-%S")
|
| 79 |
|
| 80 |
-
# Generate structured data
|
| 81 |
-
prompt = f"{INDEX_PROMPT}\nData to index:\n{content[:5000]}" # Truncate for API limits
|
| 82 |
try:
|
| 83 |
-
|
| 84 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
except Exception as e:
|
| 86 |
-
print(f"Memory processing
|
| 87 |
return []
|
| 88 |
-
|
| 89 |
-
# Save to memory files
|
| 90 |
-
memory_entry = {
|
| 91 |
-
**structured_data,
|
| 92 |
-
"file_name": f"{timestamp}--{uid}.json",
|
| 93 |
-
"timestamp": str(datetime.datetime.now())
|
| 94 |
-
}
|
| 95 |
-
|
| 96 |
-
# Update main memory file
|
| 97 |
-
main_url = f"{SAVE_DATA_URL}mem-test2/main.json"
|
| 98 |
-
m = requests.get(main_url)
|
| 99 |
-
main_data = json.loads(m.text) if m.status_code == 200 else []
|
| 100 |
-
main_data.append(memory_entry)
|
| 101 |
-
|
| 102 |
-
main_path = f"tmp-main-{uid}.json"
|
| 103 |
-
with open(main_path, "w") as f:
|
| 104 |
-
json.dump(main_data, f)
|
| 105 |
-
|
| 106 |
-
api.upload_file(
|
| 107 |
-
path_or_fileobj=main_path,
|
| 108 |
-
path_in_repo="/mem-test2/main.json",
|
| 109 |
-
repo_id=REPO_NAME,
|
| 110 |
-
repo_type="dataset",
|
| 111 |
-
)
|
| 112 |
-
|
| 113 |
-
# Update index
|
| 114 |
-
create_index()
|
| 115 |
-
|
| 116 |
-
return [memory_entry]
|
| 117 |
|
| 118 |
def fetch_url_content(url: str) -> Tuple[bool, str]:
|
| 119 |
"""Fetch content from a URL and return status and content."""
|
|
|
|
| 73 |
)
|
| 74 |
|
| 75 |
def save_memory(purpose: str, content: str) -> List[Dict]:
|
| 76 |
+
"""Save processed content to memory with indexing using agent prompts."""
|
| 77 |
uid = uuid.uuid4()
|
| 78 |
timestamp = datetime.datetime.now().strftime("%Y-%m-%d--%H-%M-%S")
|
| 79 |
|
|
|
|
|
|
|
| 80 |
try:
|
| 81 |
+
# Use the structured prompt from agent.py
|
| 82 |
+
prompt = PREFIX.format(
|
| 83 |
+
timestamp=timestamp,
|
| 84 |
+
purpose="Compile the provided data into structured memory"
|
| 85 |
+
) + COMPRESS_DATA_PROMPT_SMALL.format(
|
| 86 |
+
direction="Create structured memory entry",
|
| 87 |
+
knowledge="",
|
| 88 |
+
history=content[:5000] # Truncate for API limits
|
| 89 |
+
)
|
| 90 |
+
|
| 91 |
+
if VERBOSE:
|
| 92 |
+
print(LOG_PROMPT.format(prompt))
|
| 93 |
+
|
| 94 |
+
response = generate_response(prompt, model="anthropic/claude-2")
|
| 95 |
+
|
| 96 |
+
if VERBOSE:
|
| 97 |
+
print(LOG_RESPONSE.format(response))
|
| 98 |
+
|
| 99 |
+
# Enhanced JSON extraction
|
| 100 |
+
json_str = response.split('```json')[1].split('```')[0] if '```json' in response else response
|
| 101 |
+
structured_data = json.loads(json_str.strip())
|
| 102 |
+
|
| 103 |
+
# Validate required structure
|
| 104 |
+
if not all(key in structured_data for key in ["keywords", "title", "content"]):
|
| 105 |
+
raise ValueError("Missing required fields in structured data")
|
| 106 |
+
|
| 107 |
+
# Create memory entry
|
| 108 |
+
memory_entry = {
|
| 109 |
+
**structured_data,
|
| 110 |
+
"file_name": f"{timestamp}--{uid}.json",
|
| 111 |
+
"timestamp": str(datetime.datetime.now()),
|
| 112 |
+
"source": "arxiv" if "arxiv.org" in content.lower() else "web"
|
| 113 |
+
}
|
| 114 |
+
|
| 115 |
+
# Update main memory file
|
| 116 |
+
main_url = f"{SAVE_DATA_URL}mem-test2/main.json"
|
| 117 |
+
m = requests.get(main_url)
|
| 118 |
+
main_data = json.loads(m.text) if m.status_code == 200 else []
|
| 119 |
+
main_data.append(memory_entry)
|
| 120 |
+
|
| 121 |
+
# Save to HuggingFace
|
| 122 |
+
main_path = f"tmp-main-{uid}.json"
|
| 123 |
+
with open(main_path, "w") as f:
|
| 124 |
+
json.dump(main_data, f)
|
| 125 |
+
|
| 126 |
+
api.upload_file(
|
| 127 |
+
path_or_fileobj=main_path,
|
| 128 |
+
path_in_repo="/mem-test2/main.json",
|
| 129 |
+
repo_id=REPO_NAME,
|
| 130 |
+
repo_type="dataset",
|
| 131 |
+
)
|
| 132 |
+
|
| 133 |
+
# Update search index
|
| 134 |
+
create_index()
|
| 135 |
+
|
| 136 |
+
return [memory_entry]
|
| 137 |
+
|
| 138 |
except Exception as e:
|
| 139 |
+
print(f"{LOG_RESPONSE.format('Memory processing failed: ' + str(e))}")
|
| 140 |
return []
|
| 141 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
|
| 143 |
def fetch_url_content(url: str) -> Tuple[bool, str]:
|
| 144 |
"""Fetch content from a URL and return status and content."""
|