Update app.py
Browse files
app.py
CHANGED
|
@@ -10,6 +10,9 @@ def load_community_alignment_dataset():
|
|
| 10 |
"""Load the Facebook Community Alignment Dataset"""
|
| 11 |
try:
|
| 12 |
dataset = load_dataset("facebook/community-alignment-dataset")
|
|
|
|
|
|
|
|
|
|
| 13 |
return dataset
|
| 14 |
except Exception as e:
|
| 15 |
print(f"Error loading dataset: {e}")
|
|
@@ -45,12 +48,18 @@ def get_conversation_data(conversation_id: int) -> Dict[str, Any]:
|
|
| 45 |
if not dataset:
|
| 46 |
return None
|
| 47 |
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
for
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
def format_annotator_info(item: Dict[str, Any]) -> str:
|
| 56 |
"""Format annotator information"""
|
|
@@ -142,10 +151,20 @@ def get_random_conversation() -> int:
|
|
| 142 |
if not dataset:
|
| 143 |
return 0
|
| 144 |
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
|
| 150 |
def get_dataset_stats() -> str:
|
| 151 |
"""Get dataset statistics"""
|
|
@@ -174,16 +193,21 @@ def search_conversations(query: str, field: str) -> str:
|
|
| 174 |
results = []
|
| 175 |
query_lower = query.lower()
|
| 176 |
|
| 177 |
-
|
| 178 |
-
for
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
|
| 188 |
if not results:
|
| 189 |
return f"No results found for '{query}' in field '{field}'"
|
|
@@ -334,4 +358,4 @@ if __name__ == "__main__":
|
|
| 334 |
server_port=7860,
|
| 335 |
share=False,
|
| 336 |
show_error=True
|
| 337 |
-
)
|
|
|
|
| 10 |
"""Load the Facebook Community Alignment Dataset"""
|
| 11 |
try:
|
| 12 |
dataset = load_dataset("facebook/community-alignment-dataset")
|
| 13 |
+
print(f"Dataset loaded successfully. Available splits: {list(dataset.keys())}")
|
| 14 |
+
for split_name, split_data in dataset.items():
|
| 15 |
+
print(f"Split '{split_name}': {len(split_data)} items")
|
| 16 |
return dataset
|
| 17 |
except Exception as e:
|
| 18 |
print(f"Error loading dataset: {e}")
|
|
|
|
| 48 |
if not dataset:
|
| 49 |
return None
|
| 50 |
|
| 51 |
+
try:
|
| 52 |
+
# Search for conversation in the dataset
|
| 53 |
+
for split in dataset.keys():
|
| 54 |
+
split_data = dataset[split]
|
| 55 |
+
for i in range(len(split_data)):
|
| 56 |
+
item = split_data[i]
|
| 57 |
+
if item.get('conversation_id') == conversation_id:
|
| 58 |
+
return item
|
| 59 |
+
return None
|
| 60 |
+
except Exception as e:
|
| 61 |
+
print(f"Error getting conversation data: {e}")
|
| 62 |
+
return None
|
| 63 |
|
| 64 |
def format_annotator_info(item: Dict[str, Any]) -> str:
|
| 65 |
"""Format annotator information"""
|
|
|
|
| 151 |
if not dataset:
|
| 152 |
return 0
|
| 153 |
|
| 154 |
+
try:
|
| 155 |
+
# Get a random split
|
| 156 |
+
split = random.choice(list(dataset.keys()))
|
| 157 |
+
split_data = dataset[split]
|
| 158 |
+
|
| 159 |
+
# Get a random index
|
| 160 |
+
random_index = random.randint(0, len(split_data) - 1)
|
| 161 |
+
item = split_data[random_index]
|
| 162 |
+
|
| 163 |
+
return item.get('conversation_id', 0)
|
| 164 |
+
except Exception as e:
|
| 165 |
+
print(f"Error getting random conversation: {e}")
|
| 166 |
+
# Fallback: return a default conversation ID
|
| 167 |
+
return 1061830552573006 # The ID from your example
|
| 168 |
|
| 169 |
def get_dataset_stats() -> str:
|
| 170 |
"""Get dataset statistics"""
|
|
|
|
| 193 |
results = []
|
| 194 |
query_lower = query.lower()
|
| 195 |
|
| 196 |
+
try:
|
| 197 |
+
for split_name, split_data in dataset.items():
|
| 198 |
+
# Limit search to first 100 items per split
|
| 199 |
+
for i in range(min(100, len(split_data))):
|
| 200 |
+
item = split_data[i]
|
| 201 |
+
if field in item and item[field]:
|
| 202 |
+
field_value = str(item[field]).lower()
|
| 203 |
+
if query_lower in field_value:
|
| 204 |
+
results.append({
|
| 205 |
+
'conversation_id': item.get('conversation_id'),
|
| 206 |
+
'split': split_name,
|
| 207 |
+
'field_value': str(item[field])[:100] + "..." if len(str(item[field])) > 100 else str(item[field])
|
| 208 |
+
})
|
| 209 |
+
except Exception as e:
|
| 210 |
+
return f"Error during search: {e}"
|
| 211 |
|
| 212 |
if not results:
|
| 213 |
return f"No results found for '{query}' in field '{field}'"
|
|
|
|
| 358 |
server_port=7860,
|
| 359 |
share=False,
|
| 360 |
show_error=True
|
| 361 |
+
)
|