ryanshelley commited on
Commit
824ece5
·
verified ·
1 Parent(s): 666dd15

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -0
app.py CHANGED
@@ -12,6 +12,7 @@ import time
12
  import warnings
13
  import os
14
  import re
 
15
 
16
  # Trafilatura imports
17
  from trafilatura import fetch_url, extract, bare_extraction
@@ -19,6 +20,27 @@ from trafilatura.downloads import fetch_url as trafilatura_fetch
19
 
20
  warnings.filterwarnings('ignore')
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  @dataclass
23
  class ContentChunk:
24
  content: str
 
12
  import warnings
13
  import os
14
  import re
15
+ import tempfile
16
 
17
  # Trafilatura imports
18
  from trafilatura import fetch_url, extract, bare_extraction
 
20
 
21
  warnings.filterwarnings('ignore')
22
 
23
+ # Global variable to store the latest vector data
24
+ latest_vector_data = None
25
+
26
+ def prepare_download(vector_df):
27
+ """Prepare the vector data for download"""
28
+ global latest_vector_data
29
+ if vector_df is not None and not vector_df.empty:
30
+ # Save to temporary file
31
+ temp_file = tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False, newline='', encoding='utf-8')
32
+ vector_df.to_csv(temp_file.name, index=False)
33
+ latest_vector_data = temp_file.name
34
+ return temp_file.name
35
+ return None
36
+
37
+ def download_vector_data():
38
+ """Return the prepared vector data file"""
39
+ global latest_vector_data
40
+ if latest_vector_data:
41
+ return latest_vector_data
42
+ return None
43
+
44
  @dataclass
45
  class ContentChunk:
46
  content: str