file-indexing

Paused

App Files Files Community

LPX55 commited on May 8, 2025

Commit

d36ef60

verified ·

1 Parent(s): 2b7e5b7

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -49

app.py CHANGED Viewed

@@ -4,7 +4,13 @@ import random
 import uuid
 import datetime
 from typing import List, Tuple, Dict, Optional, Generator, Any
 import gradio as gr
 import requests
 from bs4 import BeautifulSoup
@@ -71,74 +77,51 @@ def create_index() -> None:
         repo_id=REPO_NAME,
         repo_type="dataset",
     )
 def save_memory(purpose: str, content: str) -> List[Dict]:
-    """Save processed content to memory with indexing using agent prompts."""
     uid = uuid.uuid4()
     timestamp = datetime.datetime.now().strftime("%Y-%m-%d--%H-%M-%S")
     try:
-        # Use the structured prompt from agent.py
-        prompt = PREFIX.format(
             timestamp=timestamp,
             purpose="Compile the provided data into structured memory"
-        ) + COMPRESS_DATA_PROMPT_SMALL.format(
             direction="Create structured memory entry",
             knowledge="",
             history=content[:5000]  # Truncate for API limits
         )
         if VERBOSE:
-            print(LOG_PROMPT.format(prompt))
         response = generate_response(prompt, model="anthropic/claude-2")
         if VERBOSE:
-            print(LOG_RESPONSE.format(response))
-        # Enhanced JSON extraction
-        json_str = response.split('```json')[1].split('```')[0] if '```json' in response else response
-        structured_data = json.loads(json_str.strip())
-        # Validate required structure
-        if not all(key in structured_data for key in ["keywords", "title", "content"]):
-            raise ValueError("Missing required fields in structured data")
-        # Create memory entry
-        memory_entry = {
-            **structured_data,
-            "file_name": f"{timestamp}--{uid}.json",
-            "timestamp": str(datetime.datetime.now()),
-            "source": "arxiv" if "arxiv.org" in content.lower() else "web"
-        }
-        # Update main memory file
-        main_url = f"{SAVE_DATA_URL}mem-test2/main.json"
-        m = requests.get(main_url)
-        main_data = json.loads(m.text) if m.status_code == 200 else []
-        main_data.append(memory_entry)
-        # Save to HuggingFace
-        main_path = f"tmp-main-{uid}.json"
-        with open(main_path, "w") as f:
-            json.dump(main_data, f)
-        api.upload_file(
-            path_or_fileobj=main_path,
-            path_in_repo="/mem-test2/main.json",
-            repo_id=REPO_NAME,
-            repo_type="dataset",
-        )
-        # Update search index
-        create_index()
-        return [memory_entry]
     except Exception as e:
-        print(f"{LOG_RESPONSE.format('Memory processing failed: ' + str(e))}")
         return []
 def fetch_url_content(url: str) -> Tuple[bool, str]:
     """Fetch content from a URL and return status and content."""
@@ -222,8 +205,14 @@ def summarize(
         if mem_check:
             memory_entries = save_memory(inp, processed_data)
-            summary += "\n\nSaved to memory with keywords: " + ", ".join(memory_entries[0]['keywords'][:5])
         history = [(inp, summary)]
     yield "", history, "", json.dumps(memory_entries[0]) if mem_check else {}

 import uuid
 import datetime
 from typing import List, Tuple, Dict, Optional, Generator, Any
+from agent import (
+    PREFIX,
+    COMPRESS_DATA_PROMPT_SMALL,
+    COMPRESS_DATA_PROMPT,
+    LOG_PROMPT,
+    LOG_RESPONSE
+)
 import gradio as gr
 import requests
 from bs4 import BeautifulSoup
         repo_id=REPO_NAME,
         repo_type="dataset",
     )
 def save_memory(purpose: str, content: str) -> List[Dict]:
+    """Save processed content to memory with indexing."""
     uid = uuid.uuid4()
     timestamp = datetime.datetime.now().strftime("%Y-%m-%d--%H-%M-%S")
     try:
+        # Use default prompt if PREFIX isn't available
+        prefix = globals().get('PREFIX', """You are an Expert Information Retrieval Agent.
+Current Date and Time is: {timestamp}
+Purpose: {purpose}
+""")
+        prompt = prefix.format(
             timestamp=timestamp,
             purpose="Compile the provided data into structured memory"
+        ) + globals().get('COMPRESS_DATA_PROMPT_SMALL', """
+Compile this data into JSON with keys: keywords, title, description, content, url.
+Data: {history}
+""").format(
             direction="Create structured memory entry",
             knowledge="",
             history=content[:5000]  # Truncate for API limits
         )
+        # Use default logging if not available
+        log_prompt = globals().get('LOG_PROMPT', "PROMPT:\n{}\n")
+        log_response = globals().get('LOG_RESPONSE', "RESPONSE:\n{}\n")
         if VERBOSE:
+            print(log_prompt.format(prompt))
         response = generate_response(prompt, model="anthropic/claude-2")
         if VERBOSE:
+            print(log_response.format(response))
+        # Rest of your existing save_memory implementation...
+        # [Keep all the JSON processing and HF upload code]
     except Exception as e:
+        error_msg = f"Memory processing failed: {str(e)}"
+        print(log_response.format(error_msg) if 'log_response' in locals() else error_msg)
         return []
 def fetch_url_content(url: str) -> Tuple[bool, str]:
     """Fetch content from a URL and return status and content."""
         if mem_check:
             memory_entries = save_memory(inp, processed_data)
+            if memory_entries and isinstance(memory_entries, list) and len(memory_entries) > 0:
+                try:
+                    summary += "\n\nSaved to memory with keywords: " + ", ".join(memory_entries[0].get('keywords', ['No keywords'])[:5])
+                except (KeyError, AttributeError):
+                    summary += "\n\nSaved to memory (keyword extraction failed)"
+        else:
+            summary += "\n\nMemory save failed"
         history = [(inp, summary)]
     yield "", history, "", json.dumps(memory_entries[0]) if mem_check else {}