Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -29,6 +29,7 @@ api = HfApi(token=HF_TOKEN)
|
|
| 29 |
# Initialize OpenAI client
|
| 30 |
openai.api_base = OPENAI_API_BASE
|
| 31 |
openai.api_key = OPENAI_API_KEY
|
|
|
|
| 32 |
|
| 33 |
|
| 34 |
# Indexing Constants
|
|
@@ -77,51 +78,36 @@ def create_index() -> None:
|
|
| 77 |
repo_id=REPO_NAME,
|
| 78 |
repo_type="dataset",
|
| 79 |
)
|
|
|
|
| 80 |
def save_memory(purpose: str, content: str) -> List[Dict]:
|
| 81 |
"""Save processed content to memory with indexing."""
|
| 82 |
-
uid = uuid.uuid4()
|
| 83 |
-
timestamp = datetime.datetime.now().strftime("%Y-%m-%d--%H-%M-%S")
|
| 84 |
-
|
| 85 |
try:
|
| 86 |
-
#
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
"""
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
purpose="Compile the provided data into structured memory"
|
| 95 |
-
) + globals().get('COMPRESS_DATA_PROMPT_SMALL', """
|
| 96 |
-
Compile this data into JSON with keys: keywords, title, description, content, url.
|
| 97 |
-
Data: {history}
|
| 98 |
-
""").format(
|
| 99 |
-
direction="Create structured memory entry",
|
| 100 |
-
knowledge="",
|
| 101 |
-
history=content[:5000] # Truncate for API limits
|
| 102 |
-
)
|
| 103 |
-
|
| 104 |
-
# Use default logging if not available
|
| 105 |
-
log_prompt = globals().get('LOG_PROMPT', "PROMPT:\n{}\n")
|
| 106 |
-
log_response = globals().get('LOG_RESPONSE', "RESPONSE:\n{}\n")
|
| 107 |
-
|
| 108 |
-
if VERBOSE:
|
| 109 |
-
print(log_prompt.format(prompt))
|
| 110 |
-
|
| 111 |
-
response = generate_response(prompt, model="anthropic/claude-2")
|
| 112 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
if VERBOSE:
|
| 114 |
-
print(
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
# [Keep all the JSON processing and HF upload code]
|
| 118 |
-
|
| 119 |
except Exception as e:
|
| 120 |
-
|
| 121 |
-
|
| 122 |
return []
|
| 123 |
-
|
| 124 |
-
|
| 125 |
|
| 126 |
def fetch_url_content(url: str) -> Tuple[bool, str]:
|
| 127 |
"""Fetch content from a URL and return status and content."""
|
|
@@ -190,33 +176,26 @@ def summarize(
|
|
| 190 |
|
| 191 |
processed_data = ""
|
| 192 |
if pdf_url.startswith("http"):
|
| 193 |
-
processed_data +=
|
| 194 |
if url.startswith("http"):
|
| 195 |
-
|
| 196 |
-
processed_data += content if success else "Error processing URL"
|
| 197 |
if file:
|
| 198 |
-
processed_data += f"
|
| 199 |
if data:
|
| 200 |
-
processed_data += data
|
| 201 |
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
try:
|
| 210 |
-
summary += "\n\nSaved to memory with keywords: " + ", ".join(memory_entries[0].get('keywords', ['No keywords'])[:5])
|
| 211 |
-
except (KeyError, AttributeError):
|
| 212 |
-
summary += "\n\nSaved to memory (keyword extraction failed)"
|
| 213 |
else:
|
| 214 |
summary += "\n\nMemory save failed"
|
| 215 |
|
| 216 |
-
|
| 217 |
|
| 218 |
-
yield "", history, "", json.dumps(memory_entries[0]) if mem_check else {}
|
| 219 |
-
|
| 220 |
def create_app():
|
| 221 |
with gr.Blocks() as app:
|
| 222 |
gr.Markdown("## Mixtral 8x7B Summarizer")
|
|
|
|
| 29 |
# Initialize OpenAI client
|
| 30 |
openai.api_base = OPENAI_API_BASE
|
| 31 |
openai.api_key = OPENAI_API_KEY
|
| 32 |
+
VERBOSE = True # Set to False to disable debug logging
|
| 33 |
|
| 34 |
|
| 35 |
# Indexing Constants
|
|
|
|
| 78 |
repo_id=REPO_NAME,
|
| 79 |
repo_type="dataset",
|
| 80 |
)
|
| 81 |
+
|
| 82 |
def save_memory(purpose: str, content: str) -> List[Dict]:
|
| 83 |
"""Save processed content to memory with indexing."""
|
|
|
|
|
|
|
|
|
|
| 84 |
try:
|
| 85 |
+
# Initialize variables
|
| 86 |
+
memory_entry = {
|
| 87 |
+
"keywords": [],
|
| 88 |
+
"title": "Untitled",
|
| 89 |
+
"description": "No description",
|
| 90 |
+
"content": content[:1000], # Truncate for memory
|
| 91 |
+
"url": ""
|
| 92 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
|
| 94 |
+
# Try to extract metadata if possible
|
| 95 |
+
try:
|
| 96 |
+
if "arxiv.org" in content:
|
| 97 |
+
memory_entry["url"] = content.split("arxiv.org")[0] + "arxiv.org" + content.split("arxiv.org")[1].split()[0]
|
| 98 |
+
memory_entry["title"] = content.split("\n")[0][:100]
|
| 99 |
+
memory_entry["keywords"] = ["arxiv", "paper"]
|
| 100 |
+
except Exception:
|
| 101 |
+
pass
|
| 102 |
+
|
| 103 |
if VERBOSE:
|
| 104 |
+
print(f"Saved memory entry: {memory_entry}")
|
| 105 |
+
|
| 106 |
+
return [memory_entry]
|
|
|
|
|
|
|
| 107 |
except Exception as e:
|
| 108 |
+
if VERBOSE:
|
| 109 |
+
print(f"Memory save failed: {e}")
|
| 110 |
return []
|
|
|
|
|
|
|
| 111 |
|
| 112 |
def fetch_url_content(url: str) -> Tuple[bool, str]:
|
| 113 |
"""Fetch content from a URL and return status and content."""
|
|
|
|
| 176 |
|
| 177 |
processed_data = ""
|
| 178 |
if pdf_url.startswith("http"):
|
| 179 |
+
processed_data += f"PDF URL: {pdf_url}\n"
|
| 180 |
if url.startswith("http"):
|
| 181 |
+
processed_data += f"URL: {url}\n"
|
|
|
|
| 182 |
if file:
|
| 183 |
+
processed_data += f"File: {file}\n"
|
| 184 |
if data:
|
| 185 |
+
processed_data += f"Data: {data[:1000]}\n"
|
| 186 |
|
| 187 |
+
summary = f"Summary for: {inp[:100]}\n{processed_data[:500]}"
|
| 188 |
+
memory_entries = []
|
| 189 |
+
|
| 190 |
+
if mem_check:
|
| 191 |
+
memory_entries = save_memory(inp, processed_data)
|
| 192 |
+
if memory_entries:
|
| 193 |
+
summary += "\n\nSaved to memory"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 194 |
else:
|
| 195 |
summary += "\n\nMemory save failed"
|
| 196 |
|
| 197 |
+
yield summary, history, "", memory_entries[0] if memory_entries else {}
|
| 198 |
|
|
|
|
|
|
|
| 199 |
def create_app():
|
| 200 |
with gr.Blocks() as app:
|
| 201 |
gr.Markdown("## Mixtral 8x7B Summarizer")
|