Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -59,8 +59,6 @@ FILE_EMOJIS = {
|
|
| 59 |
"mp3": "🎵",
|
| 60 |
}
|
| 61 |
|
| 62 |
-
|
| 63 |
-
|
| 64 |
def get_high_info_terms(text: str) -> list:
|
| 65 |
# Expanded stop words
|
| 66 |
stop_words = set([
|
|
@@ -72,7 +70,7 @@ def get_high_info_terms(text: str) -> list:
|
|
| 72 |
'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most',
|
| 73 |
'other', 'some', 'such', 'than', 'too', 'very', 'just', 'there', 'as', 'if', 'while'
|
| 74 |
])
|
| 75 |
-
|
| 76 |
# Key phrases tailored to your interests
|
| 77 |
key_phrases = [
|
| 78 |
'artificial intelligence', 'machine learning', 'deep learning', 'neural networks',
|
|
@@ -112,53 +110,6 @@ def get_high_info_terms(text: str) -> list:
|
|
| 112 |
|
| 113 |
# Return only the top 5 terms
|
| 114 |
return unique_terms[:5]
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
def get_high_info_terms_old(text: str) -> list:
|
| 118 |
-
stop_words = set([
|
| 119 |
-
'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with',
|
| 120 |
-
'by', 'from', 'up', 'about', 'into', 'over', 'after', 'is', 'are', 'was', 'were',
|
| 121 |
-
'be', 'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would',
|
| 122 |
-
'should', 'could', 'might', 'must', 'shall', 'can', 'may', 'this', 'that', 'these',
|
| 123 |
-
'those', 'i', 'you', 'he', 'she', 'it', 'we', 'they', 'what', 'which', 'who',
|
| 124 |
-
'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most',
|
| 125 |
-
'other', 'some', 'such', 'than', 'too', 'very', 'just', 'there'
|
| 126 |
-
])
|
| 127 |
-
key_phrases = [
|
| 128 |
-
'artificial intelligence', 'machine learning', 'deep learning', 'neural network',
|
| 129 |
-
'personal assistant', 'natural language', 'computer vision', 'data science',
|
| 130 |
-
'reinforcement learning', 'knowledge graph', 'semantic search', 'time series',
|
| 131 |
-
'large language model', 'transformer model', 'attention mechanism',
|
| 132 |
-
'autonomous system', 'edge computing', 'quantum computing', 'blockchain technology',
|
| 133 |
-
'cognitive science', 'human computer', 'decision making', 'arxiv search',
|
| 134 |
-
'research paper', 'scientific study', 'empirical analysis'
|
| 135 |
-
]
|
| 136 |
-
|
| 137 |
-
preserved_phrases = []
|
| 138 |
-
lower_text = text.lower()
|
| 139 |
-
for phrase in key_phrases:
|
| 140 |
-
if phrase in lower_text:
|
| 141 |
-
preserved_phrases.append(phrase)
|
| 142 |
-
text = text.replace(phrase, '')
|
| 143 |
-
|
| 144 |
-
words = re.findall(r'\b\w+(?:-\w+)*\b', text)
|
| 145 |
-
high_info_words = [
|
| 146 |
-
word.lower() for word in words
|
| 147 |
-
if len(word) > 3
|
| 148 |
-
and word.lower() not in stop_words
|
| 149 |
-
and not word.isdigit()
|
| 150 |
-
and any(c.isalpha() for c in word)
|
| 151 |
-
]
|
| 152 |
-
|
| 153 |
-
all_terms = preserved_phrases + high_info_words
|
| 154 |
-
seen = set()
|
| 155 |
-
unique_terms = []
|
| 156 |
-
for term in all_terms:
|
| 157 |
-
if term not in seen:
|
| 158 |
-
seen.add(term)
|
| 159 |
-
unique_terms.append(term)
|
| 160 |
-
|
| 161 |
-
return unique_terms[:5]
|
| 162 |
|
| 163 |
def clean_text_for_filename(text: str) -> str:
|
| 164 |
text = text.lower()
|
|
@@ -190,22 +141,6 @@ def generate_filename(prompt, response, file_type="md"):
|
|
| 190 |
filename = f"{prefix}{full_name}.{file_type}"
|
| 191 |
return filename
|
| 192 |
|
| 193 |
-
def generate_filename_old(prompt, response, file_type="md"):
|
| 194 |
-
prefix = datetime.now().strftime("%m%d_%hh%mm") + "_"
|
| 195 |
-
combined = (prompt + " " + response).strip()
|
| 196 |
-
info_terms = get_high_info_terms(combined)
|
| 197 |
-
|
| 198 |
-
snippet = (prompt[:100] + " " + response[:100]).strip()
|
| 199 |
-
snippet_cleaned = clean_text_for_filename(snippet)
|
| 200 |
-
name_parts = info_terms + [snippet_cleaned]
|
| 201 |
-
full_name = '_'.join(name_parts)
|
| 202 |
-
|
| 203 |
-
if len(full_name) > 150:
|
| 204 |
-
full_name = full_name[:150]
|
| 205 |
-
|
| 206 |
-
filename = f"{prefix}{full_name}.{file_type}"
|
| 207 |
-
return filename
|
| 208 |
-
|
| 209 |
def create_file(prompt, response, file_type="md"):
|
| 210 |
filename = generate_filename(prompt.strip(), response.strip(), file_type)
|
| 211 |
with open(filename, 'w', encoding='utf-8') as f:
|
|
|
|
| 59 |
"mp3": "🎵",
|
| 60 |
}
|
| 61 |
|
|
|
|
|
|
|
| 62 |
def get_high_info_terms(text: str) -> list:
|
| 63 |
# Expanded stop words
|
| 64 |
stop_words = set([
|
|
|
|
| 70 |
'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most',
|
| 71 |
'other', 'some', 'such', 'than', 'too', 'very', 'just', 'there', 'as', 'if', 'while'
|
| 72 |
])
|
| 73 |
+
|
| 74 |
# Key phrases tailored to your interests
|
| 75 |
key_phrases = [
|
| 76 |
'artificial intelligence', 'machine learning', 'deep learning', 'neural networks',
|
|
|
|
| 110 |
|
| 111 |
# Return only the top 5 terms
|
| 112 |
return unique_terms[:5]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
|
| 114 |
def clean_text_for_filename(text: str) -> str:
|
| 115 |
text = text.lower()
|
|
|
|
| 141 |
filename = f"{prefix}{full_name}.{file_type}"
|
| 142 |
return filename
|
| 143 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
def create_file(prompt, response, file_type="md"):
|
| 145 |
filename = generate_filename(prompt.strip(), response.strip(), file_type)
|
| 146 |
with open(filename, 'w', encoding='utf-8') as f:
|