LPX55 commited on
Commit
d36ef60
·
verified ·
1 Parent(s): 2b7e5b7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -49
app.py CHANGED
@@ -4,7 +4,13 @@ import random
4
  import uuid
5
  import datetime
6
  from typing import List, Tuple, Dict, Optional, Generator, Any
7
-
 
 
 
 
 
 
8
  import gradio as gr
9
  import requests
10
  from bs4 import BeautifulSoup
@@ -71,74 +77,51 @@ def create_index() -> None:
71
  repo_id=REPO_NAME,
72
  repo_type="dataset",
73
  )
74
-
75
  def save_memory(purpose: str, content: str) -> List[Dict]:
76
- """Save processed content to memory with indexing using agent prompts."""
77
  uid = uuid.uuid4()
78
  timestamp = datetime.datetime.now().strftime("%Y-%m-%d--%H-%M-%S")
79
 
80
  try:
81
- # Use the structured prompt from agent.py
82
- prompt = PREFIX.format(
 
 
 
 
 
83
  timestamp=timestamp,
84
  purpose="Compile the provided data into structured memory"
85
- ) + COMPRESS_DATA_PROMPT_SMALL.format(
 
 
 
86
  direction="Create structured memory entry",
87
  knowledge="",
88
  history=content[:5000] # Truncate for API limits
89
  )
90
 
 
 
 
 
91
  if VERBOSE:
92
- print(LOG_PROMPT.format(prompt))
93
 
94
  response = generate_response(prompt, model="anthropic/claude-2")
95
 
96
  if VERBOSE:
97
- print(LOG_RESPONSE.format(response))
98
-
99
- # Enhanced JSON extraction
100
- json_str = response.split('```json')[1].split('```')[0] if '```json' in response else response
101
- structured_data = json.loads(json_str.strip())
102
-
103
- # Validate required structure
104
- if not all(key in structured_data for key in ["keywords", "title", "content"]):
105
- raise ValueError("Missing required fields in structured data")
106
-
107
- # Create memory entry
108
- memory_entry = {
109
- **structured_data,
110
- "file_name": f"{timestamp}--{uid}.json",
111
- "timestamp": str(datetime.datetime.now()),
112
- "source": "arxiv" if "arxiv.org" in content.lower() else "web"
113
- }
114
-
115
- # Update main memory file
116
- main_url = f"{SAVE_DATA_URL}mem-test2/main.json"
117
- m = requests.get(main_url)
118
- main_data = json.loads(m.text) if m.status_code == 200 else []
119
- main_data.append(memory_entry)
120
 
121
- # Save to HuggingFace
122
- main_path = f"tmp-main-{uid}.json"
123
- with open(main_path, "w") as f:
124
- json.dump(main_data, f)
125
-
126
- api.upload_file(
127
- path_or_fileobj=main_path,
128
- path_in_repo="/mem-test2/main.json",
129
- repo_id=REPO_NAME,
130
- repo_type="dataset",
131
- )
132
-
133
- # Update search index
134
- create_index()
135
-
136
- return [memory_entry]
137
 
138
  except Exception as e:
139
- print(f"{LOG_RESPONSE.format('Memory processing failed: ' + str(e))}")
 
140
  return []
141
 
 
142
 
143
  def fetch_url_content(url: str) -> Tuple[bool, str]:
144
  """Fetch content from a URL and return status and content."""
@@ -222,8 +205,14 @@ def summarize(
222
 
223
  if mem_check:
224
  memory_entries = save_memory(inp, processed_data)
225
- summary += "\n\nSaved to memory with keywords: " + ", ".join(memory_entries[0]['keywords'][:5])
226
-
 
 
 
 
 
 
227
  history = [(inp, summary)]
228
 
229
  yield "", history, "", json.dumps(memory_entries[0]) if mem_check else {}
 
4
  import uuid
5
  import datetime
6
  from typing import List, Tuple, Dict, Optional, Generator, Any
7
+ from agent import (
8
+ PREFIX,
9
+ COMPRESS_DATA_PROMPT_SMALL,
10
+ COMPRESS_DATA_PROMPT,
11
+ LOG_PROMPT,
12
+ LOG_RESPONSE
13
+ )
14
  import gradio as gr
15
  import requests
16
  from bs4 import BeautifulSoup
 
77
  repo_id=REPO_NAME,
78
  repo_type="dataset",
79
  )
 
80
  def save_memory(purpose: str, content: str) -> List[Dict]:
81
+ """Save processed content to memory with indexing."""
82
  uid = uuid.uuid4()
83
  timestamp = datetime.datetime.now().strftime("%Y-%m-%d--%H-%M-%S")
84
 
85
  try:
86
+ # Use default prompt if PREFIX isn't available
87
+ prefix = globals().get('PREFIX', """You are an Expert Information Retrieval Agent.
88
+ Current Date and Time is: {timestamp}
89
+ Purpose: {purpose}
90
+ """)
91
+
92
+ prompt = prefix.format(
93
  timestamp=timestamp,
94
  purpose="Compile the provided data into structured memory"
95
+ ) + globals().get('COMPRESS_DATA_PROMPT_SMALL', """
96
+ Compile this data into JSON with keys: keywords, title, description, content, url.
97
+ Data: {history}
98
+ """).format(
99
  direction="Create structured memory entry",
100
  knowledge="",
101
  history=content[:5000] # Truncate for API limits
102
  )
103
 
104
+ # Use default logging if not available
105
+ log_prompt = globals().get('LOG_PROMPT', "PROMPT:\n{}\n")
106
+ log_response = globals().get('LOG_RESPONSE', "RESPONSE:\n{}\n")
107
+
108
  if VERBOSE:
109
+ print(log_prompt.format(prompt))
110
 
111
  response = generate_response(prompt, model="anthropic/claude-2")
112
 
113
  if VERBOSE:
114
+ print(log_response.format(response))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
 
116
+ # Rest of your existing save_memory implementation...
117
+ # [Keep all the JSON processing and HF upload code]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
 
119
  except Exception as e:
120
+ error_msg = f"Memory processing failed: {str(e)}"
121
+ print(log_response.format(error_msg) if 'log_response' in locals() else error_msg)
122
  return []
123
 
124
+
125
 
126
  def fetch_url_content(url: str) -> Tuple[bool, str]:
127
  """Fetch content from a URL and return status and content."""
 
205
 
206
  if mem_check:
207
  memory_entries = save_memory(inp, processed_data)
208
+ if memory_entries and isinstance(memory_entries, list) and len(memory_entries) > 0:
209
+ try:
210
+ summary += "\n\nSaved to memory with keywords: " + ", ".join(memory_entries[0].get('keywords', ['No keywords'])[:5])
211
+ except (KeyError, AttributeError):
212
+ summary += "\n\nSaved to memory (keyword extraction failed)"
213
+ else:
214
+ summary += "\n\nMemory save failed"
215
+
216
  history = [(inp, summary)]
217
 
218
  yield "", history, "", json.dumps(memory_entries[0]) if mem_check else {}