file-indexing

Paused

App Files Files Community

LPX55 commited on May 8, 2025

Commit

11f36f0

verified ·

1 Parent(s): d36ef60

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -57

app.py CHANGED Viewed

@@ -29,6 +29,7 @@ api = HfApi(token=HF_TOKEN)
 # Initialize OpenAI client
 openai.api_base = OPENAI_API_BASE
 openai.api_key = OPENAI_API_KEY
 # Indexing Constants
@@ -77,51 +78,36 @@ def create_index() -> None:
         repo_id=REPO_NAME,
         repo_type="dataset",
     )
 def save_memory(purpose: str, content: str) -> List[Dict]:
     """Save processed content to memory with indexing."""
-    uid = uuid.uuid4()
-    timestamp = datetime.datetime.now().strftime("%Y-%m-%d--%H-%M-%S")
     try:
-        # Use default prompt if PREFIX isn't available
-        prefix = globals().get('PREFIX', """You are an Expert Information Retrieval Agent.
-Current Date and Time is: {timestamp}
-Purpose: {purpose}
-""")
-        prompt = prefix.format(
-            timestamp=timestamp,
-            purpose="Compile the provided data into structured memory"
-        ) + globals().get('COMPRESS_DATA_PROMPT_SMALL', """
-Compile this data into JSON with keys: keywords, title, description, content, url.
-Data: {history}
-""").format(
-            direction="Create structured memory entry",
-            knowledge="",
-            history=content[:5000]  # Truncate for API limits
-        )
-        # Use default logging if not available
-        log_prompt = globals().get('LOG_PROMPT', "PROMPT:\n{}\n")
-        log_response = globals().get('LOG_RESPONSE', "RESPONSE:\n{}\n")
-        if VERBOSE:
-            print(log_prompt.format(prompt))
-        response = generate_response(prompt, model="anthropic/claude-2")
         if VERBOSE:
-            print(log_response.format(response))
-        # Rest of your existing save_memory implementation...
-        # [Keep all the JSON processing and HF upload code]
     except Exception as e:
-        error_msg = f"Memory processing failed: {str(e)}"
-        print(log_response.format(error_msg) if 'log_response' in locals() else error_msg)
         return []
 def fetch_url_content(url: str) -> Tuple[bool, str]:
     """Fetch content from a URL and return status and content."""
@@ -190,33 +176,26 @@ def summarize(
     processed_data = ""
     if pdf_url.startswith("http"):
-        processed_data += process_pdf_url(pdf_url)
     if url.startswith("http"):
-        success, content = fetch_url_content(url)
-        processed_data += content if success else "Error processing URL"
     if file:
-        processed_data += f"\nFile: {file}\n{read_file_content(file)}"
     if data:
-        processed_data += data
-    if processed_data:
-        prompt = f"Summarize this data: {processed_data[:1000]}..."
-        summary = generate_response(prompt, model=model)
-        if mem_check:
-            memory_entries = save_memory(inp, processed_data)
-            if memory_entries and isinstance(memory_entries, list) and len(memory_entries) > 0:
-                try:
-                    summary += "\n\nSaved to memory with keywords: " + ", ".join(memory_entries[0].get('keywords', ['No keywords'])[:5])
-                except (KeyError, AttributeError):
-                    summary += "\n\nSaved to memory (keyword extraction failed)"
         else:
             summary += "\n\nMemory save failed"
-        history = [(inp, summary)]
-    yield "", history, "", json.dumps(memory_entries[0]) if mem_check else {}
 def create_app():
     with gr.Blocks() as app:
         gr.Markdown("## Mixtral 8x7B Summarizer")

 # Initialize OpenAI client
 openai.api_base = OPENAI_API_BASE
 openai.api_key = OPENAI_API_KEY
+VERBOSE = True  # Set to False to disable debug logging
 # Indexing Constants
         repo_id=REPO_NAME,
         repo_type="dataset",
     )
 def save_memory(purpose: str, content: str) -> List[Dict]:
     """Save processed content to memory with indexing."""
     try:
+        # Initialize variables
+        memory_entry = {
+            "keywords": [],
+            "title": "Untitled",
+            "description": "No description",
+            "content": content[:1000],  # Truncate for memory
+            "url": ""
+        }
+        # Try to extract metadata if possible
+        try:
+            if "arxiv.org" in content:
+                memory_entry["url"] = content.split("arxiv.org")[0] + "arxiv.org" + content.split("arxiv.org")[1].split()[0]
+                memory_entry["title"] = content.split("\n")[0][:100]
+                memory_entry["keywords"] = ["arxiv", "paper"]
+        except Exception:
+            pass
         if VERBOSE:
+            print(f"Saved memory entry: {memory_entry}")
+        return [memory_entry]
     except Exception as e:
+        if VERBOSE:
+            print(f"Memory save failed: {e}")
         return []
 def fetch_url_content(url: str) -> Tuple[bool, str]:
     """Fetch content from a URL and return status and content."""
     processed_data = ""
     if pdf_url.startswith("http"):
+        processed_data += f"PDF URL: {pdf_url}\n"
     if url.startswith("http"):
+        processed_data += f"URL: {url}\n"
     if file:
+        processed_data += f"File: {file}\n"
     if data:
+        processed_data += f"Data: {data[:1000]}\n"
+    summary = f"Summary for: {inp[:100]}\n{processed_data[:500]}"
+    memory_entries = []
+    if mem_check:
+        memory_entries = save_memory(inp, processed_data)
+        if memory_entries:
+            summary += "\n\nSaved to memory"
         else:
             summary += "\n\nMemory save failed"
+    yield summary, history, "", memory_entries[0] if memory_entries else {}
 def create_app():
     with gr.Blocks() as app:
         gr.Markdown("## Mixtral 8x7B Summarizer")