Spaces:
Build error
Build error
Commit ·
ff8256a
1
Parent(s): 446a37d
back 2 async fix2
Browse files
app.py
CHANGED
|
@@ -768,6 +768,133 @@ def create_output_file(df, uploaded_file):
|
|
| 768 |
return None
|
| 769 |
|
| 770 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 771 |
def create_interface():
|
| 772 |
control = ProcessControl()
|
| 773 |
|
|
@@ -775,7 +902,7 @@ def create_interface():
|
|
| 775 |
# Create state for file data
|
| 776 |
current_file = gr.State(None)
|
| 777 |
|
| 778 |
-
gr.Markdown("# AI-анализ мониторинга новостей v.2.
|
| 779 |
|
| 780 |
with gr.Row():
|
| 781 |
file_input = gr.File(
|
|
@@ -825,7 +952,6 @@ def create_interface():
|
|
| 825 |
with gr.Column(scale=1):
|
| 826 |
events_plot = gr.Plot(label="Распределение событий")
|
| 827 |
|
| 828 |
-
# Create a download row with file component only
|
| 829 |
with gr.Row():
|
| 830 |
file_output = gr.File(
|
| 831 |
label="Скачать результаты",
|
|
@@ -836,125 +962,10 @@ def create_interface():
|
|
| 836 |
def stop_processing():
|
| 837 |
control.request_stop()
|
| 838 |
return "Остановка обработки..."
|
| 839 |
-
|
| 840 |
-
|
| 841 |
-
@spaces.GPU(duration=300)
|
| 842 |
-
async def process_and_download(file_bytes):
|
| 843 |
-
if file_bytes is None:
|
| 844 |
-
gr.Warning("Пожалуйста, загрузите файл")
|
| 845 |
-
yield (pd.DataFrame(), None, None, None, "Ожидание файла...", "")
|
| 846 |
-
return
|
| 847 |
-
|
| 848 |
-
detector = None
|
| 849 |
-
gpu_manager = GPUTaskManager(
|
| 850 |
-
max_retries=3,
|
| 851 |
-
retry_delay=30,
|
| 852 |
-
cleanup_callback=lambda: detector.cleanup() if detector else None
|
| 853 |
-
)
|
| 854 |
-
|
| 855 |
-
try:
|
| 856 |
-
file_obj = io.BytesIO(file_bytes)
|
| 857 |
-
logger.info("File loaded into BytesIO successfully")
|
| 858 |
-
|
| 859 |
-
detector = EventDetector()
|
| 860 |
-
|
| 861 |
-
# Read and deduplicate data with retry
|
| 862 |
-
async def read_and_dedupe():
|
| 863 |
-
df = pd.read_excel(file_obj, sheet_name='Публикации')
|
| 864 |
-
original_count = len(df)
|
| 865 |
-
df = fuzzy_deduplicate(df, 'Выдержки из текста', threshold=55)
|
| 866 |
-
return df, original_count
|
| 867 |
-
|
| 868 |
-
df, original_count = await gpu_manager.run_with_retry(read_and_dedupe)
|
| 869 |
-
|
| 870 |
-
# Process in smaller batches with better error handling
|
| 871 |
-
processed_rows = []
|
| 872 |
-
batches = gpu_manager.batch_process(list(df.iterrows()), batch_size=3)
|
| 873 |
-
|
| 874 |
-
for batch in batches:
|
| 875 |
-
if control.should_stop():
|
| 876 |
-
break
|
| 877 |
-
|
| 878 |
-
try:
|
| 879 |
-
# Process batch with retry mechanism
|
| 880 |
-
async def process_batch():
|
| 881 |
-
batch_results = []
|
| 882 |
-
for idx, row in batch:
|
| 883 |
-
text = str(row.get('Выдержки из текста', '')).strip()
|
| 884 |
-
entity = str(row.get('Объект', '')).strip()
|
| 885 |
-
|
| 886 |
-
if text and entity:
|
| 887 |
-
results = detector.process_text(text, entity)
|
| 888 |
-
batch_results.append({
|
| 889 |
-
'Объект': entity,
|
| 890 |
-
'Заголовок': str(row.get('Заголовок', '')),
|
| 891 |
-
'Translated': results['translated_text'],
|
| 892 |
-
'Sentiment': results['sentiment'],
|
| 893 |
-
'Impact': results['impact'],
|
| 894 |
-
'Reasoning': results['reasoning'],
|
| 895 |
-
'Event_Type': results['event_type'],
|
| 896 |
-
'Event_Summary': results['event_summary'],
|
| 897 |
-
'Выдержки из текста': text
|
| 898 |
-
})
|
| 899 |
-
return batch_results
|
| 900 |
-
|
| 901 |
-
batch_results = await gpu_manager.run_with_retry(process_batch)
|
| 902 |
-
processed_rows.extend(batch_results)
|
| 903 |
-
|
| 904 |
-
# Create intermediate results
|
| 905 |
-
if processed_rows:
|
| 906 |
-
result_df = pd.DataFrame(processed_rows)
|
| 907 |
-
yield (
|
| 908 |
-
result_df,
|
| 909 |
-
None, None, None,
|
| 910 |
-
f"Обработано {len(processed_rows)}/{len(df)} строк",
|
| 911 |
-
f"Удалено {original_count - len(df)} дубликатов"
|
| 912 |
-
)
|
| 913 |
-
|
| 914 |
-
except Exception as e:
|
| 915 |
-
if gpu_manager.is_gpu_error(e):
|
| 916 |
-
logger.warning(f"GPU error in batch processing: {str(e)}")
|
| 917 |
-
continue
|
| 918 |
-
else:
|
| 919 |
-
logger.error(f"Non-GPU error in batch processing: {str(e)}")
|
| 920 |
-
|
| 921 |
-
finally:
|
| 922 |
-
torch.cuda.empty_cache()
|
| 923 |
-
|
| 924 |
-
# Create final results
|
| 925 |
-
if processed_rows:
|
| 926 |
-
result_df = pd.DataFrame(processed_rows)
|
| 927 |
-
output_bytes_io = create_output_file(result_df, file_obj)
|
| 928 |
-
fig_sentiment, fig_events = create_visualizations(result_df)
|
| 929 |
-
|
| 930 |
-
if output_bytes_io:
|
| 931 |
-
temp_file = "results.xlsx"
|
| 932 |
-
with open(temp_file, "wb") as f:
|
| 933 |
-
f.write(output_bytes_io.getvalue())
|
| 934 |
-
yield (
|
| 935 |
-
result_df,
|
| 936 |
-
fig_sentiment,
|
| 937 |
-
fig_events,
|
| 938 |
-
temp_file,
|
| 939 |
-
"Обработка завершена!",
|
| 940 |
-
f"Удалено {original_count - len(df)} дубликатов"
|
| 941 |
-
)
|
| 942 |
-
return
|
| 943 |
-
|
| 944 |
-
yield (pd.DataFrame(), None, None, None, "Нет обработанных данных", "")
|
| 945 |
-
|
| 946 |
-
except Exception as e:
|
| 947 |
-
error_msg = f"Ошибка анализа: {str(e)}"
|
| 948 |
-
logger.error(error_msg)
|
| 949 |
-
yield (pd.DataFrame(), None, None, None, error_msg, "")
|
| 950 |
-
|
| 951 |
-
finally:
|
| 952 |
-
if detector:
|
| 953 |
-
detector.cleanup()
|
| 954 |
|
| 955 |
stop_btn.click(fn=stop_processing, outputs=[progress])
|
| 956 |
|
| 957 |
-
# Main processing
|
| 958 |
analyze_btn.click(
|
| 959 |
fn=process_and_download,
|
| 960 |
inputs=[file_input],
|
|
@@ -970,6 +981,7 @@ def create_interface():
|
|
| 970 |
|
| 971 |
return app
|
| 972 |
|
|
|
|
| 973 |
if __name__ == "__main__":
|
| 974 |
app = create_interface()
|
| 975 |
app.launch(share=True)
|
|
|
|
| 768 |
return None
|
| 769 |
|
| 770 |
|
| 771 |
+
|
| 772 |
+
|
| 773 |
+
@spaces.GPU(duration=300)
|
| 774 |
+
def process_and_download(file_bytes):
|
| 775 |
+
"""Synchronous wrapper for async processing"""
|
| 776 |
+
if file_bytes is None:
|
| 777 |
+
gr.Warning("Пожалуйста, загрузите файл")
|
| 778 |
+
return pd.DataFrame(), None, None, None, "Ожидание файла...", ""
|
| 779 |
+
|
| 780 |
+
async def async_process():
|
| 781 |
+
detector = None
|
| 782 |
+
gpu_manager = GPUTaskManager(
|
| 783 |
+
max_retries=3,
|
| 784 |
+
retry_delay=30,
|
| 785 |
+
cleanup_callback=lambda: detector.cleanup() if detector else None
|
| 786 |
+
)
|
| 787 |
+
|
| 788 |
+
try:
|
| 789 |
+
file_obj = io.BytesIO(file_bytes)
|
| 790 |
+
logger.info("File loaded into BytesIO successfully")
|
| 791 |
+
|
| 792 |
+
detector = EventDetector()
|
| 793 |
+
|
| 794 |
+
# Read and deduplicate data with retry
|
| 795 |
+
async def read_and_dedupe():
|
| 796 |
+
df = pd.read_excel(file_obj, sheet_name='Публикации')
|
| 797 |
+
original_count = len(df)
|
| 798 |
+
df = fuzzy_deduplicate(df, 'Выдержки из текста', threshold=55)
|
| 799 |
+
return df, original_count
|
| 800 |
+
|
| 801 |
+
df, original_count = await gpu_manager.run_with_retry(read_and_dedupe)
|
| 802 |
+
|
| 803 |
+
# Process in smaller batches with better error handling
|
| 804 |
+
processed_rows = []
|
| 805 |
+
batches = gpu_manager.batch_process(list(df.iterrows()), batch_size=3)
|
| 806 |
+
|
| 807 |
+
latest_result = (pd.DataFrame(), None, None, None, "Начало обработки...", "")
|
| 808 |
+
|
| 809 |
+
for batch in batches:
|
| 810 |
+
if control.should_stop():
|
| 811 |
+
return latest_result
|
| 812 |
+
|
| 813 |
+
try:
|
| 814 |
+
# Process batch with retry mechanism
|
| 815 |
+
async def process_batch():
|
| 816 |
+
batch_results = []
|
| 817 |
+
for idx, row in batch:
|
| 818 |
+
text = str(row.get('Выдержки из текста', '')).strip()
|
| 819 |
+
entity = str(row.get('Объект', '')).strip()
|
| 820 |
+
|
| 821 |
+
if text and entity:
|
| 822 |
+
results = detector.process_text(text, entity)
|
| 823 |
+
batch_results.append({
|
| 824 |
+
'Объект': entity,
|
| 825 |
+
'Заголовок': str(row.get('Заголовок', '')),
|
| 826 |
+
'Translated': results['translated_text'],
|
| 827 |
+
'Sentiment': results['sentiment'],
|
| 828 |
+
'Impact': results['impact'],
|
| 829 |
+
'Reasoning': results['reasoning'],
|
| 830 |
+
'Event_Type': results['event_type'],
|
| 831 |
+
'Event_Summary': results['event_summary'],
|
| 832 |
+
'Выдержки из текста': text
|
| 833 |
+
})
|
| 834 |
+
return batch_results
|
| 835 |
+
|
| 836 |
+
batch_results = await gpu_manager.run_with_retry(process_batch)
|
| 837 |
+
processed_rows.extend(batch_results)
|
| 838 |
+
|
| 839 |
+
# Update latest result
|
| 840 |
+
if processed_rows:
|
| 841 |
+
result_df = pd.DataFrame(processed_rows)
|
| 842 |
+
latest_result = (
|
| 843 |
+
result_df,
|
| 844 |
+
None, None, None,
|
| 845 |
+
f"Обработано {len(processed_rows)}/{len(df)} строк",
|
| 846 |
+
f"Удалено {original_count - len(df)} дубликатов"
|
| 847 |
+
)
|
| 848 |
+
|
| 849 |
+
except Exception as e:
|
| 850 |
+
if gpu_manager.is_gpu_error(e):
|
| 851 |
+
logger.warning(f"GPU error in batch processing: {str(e)}")
|
| 852 |
+
continue
|
| 853 |
+
else:
|
| 854 |
+
logger.error(f"Non-GPU error in batch processing: {str(e)}")
|
| 855 |
+
|
| 856 |
+
finally:
|
| 857 |
+
torch.cuda.empty_cache()
|
| 858 |
+
|
| 859 |
+
# Create final results
|
| 860 |
+
if processed_rows:
|
| 861 |
+
result_df = pd.DataFrame(processed_rows)
|
| 862 |
+
output_bytes_io = create_output_file(result_df, file_obj)
|
| 863 |
+
fig_sentiment, fig_events = create_visualizations(result_df)
|
| 864 |
+
|
| 865 |
+
if output_bytes_io:
|
| 866 |
+
temp_file = "results.xlsx"
|
| 867 |
+
with open(temp_file, "wb") as f:
|
| 868 |
+
f.write(output_bytes_io.getvalue())
|
| 869 |
+
return (
|
| 870 |
+
result_df,
|
| 871 |
+
fig_sentiment,
|
| 872 |
+
fig_events,
|
| 873 |
+
temp_file,
|
| 874 |
+
"Обработка завершена!",
|
| 875 |
+
f"Уд��лено {original_count - len(df)} дубликатов"
|
| 876 |
+
)
|
| 877 |
+
|
| 878 |
+
return (pd.DataFrame(), None, None, None, "Нет обработанных данных", "")
|
| 879 |
+
|
| 880 |
+
except Exception as e:
|
| 881 |
+
error_msg = f"Ошибка анализа: {str(e)}"
|
| 882 |
+
logger.error(error_msg)
|
| 883 |
+
return (pd.DataFrame(), None, None, None, error_msg, "")
|
| 884 |
+
|
| 885 |
+
finally:
|
| 886 |
+
if detector:
|
| 887 |
+
detector.cleanup()
|
| 888 |
+
|
| 889 |
+
# Run the async function in the event loop
|
| 890 |
+
try:
|
| 891 |
+
loop = asyncio.get_event_loop()
|
| 892 |
+
except RuntimeError:
|
| 893 |
+
loop = asyncio.new_event_loop()
|
| 894 |
+
asyncio.set_event_loop(loop)
|
| 895 |
+
|
| 896 |
+
return loop.run_until_complete(async_process())
|
| 897 |
+
|
| 898 |
def create_interface():
|
| 899 |
control = ProcessControl()
|
| 900 |
|
|
|
|
| 902 |
# Create state for file data
|
| 903 |
current_file = gr.State(None)
|
| 904 |
|
| 905 |
+
gr.Markdown("# AI-анализ мониторинга новостей v.2.1 + ext")
|
| 906 |
|
| 907 |
with gr.Row():
|
| 908 |
file_input = gr.File(
|
|
|
|
| 952 |
with gr.Column(scale=1):
|
| 953 |
events_plot = gr.Plot(label="Распределение событий")
|
| 954 |
|
|
|
|
| 955 |
with gr.Row():
|
| 956 |
file_output = gr.File(
|
| 957 |
label="Скачать результаты",
|
|
|
|
| 962 |
def stop_processing():
|
| 963 |
control.request_stop()
|
| 964 |
return "Остановка обработки..."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 965 |
|
| 966 |
stop_btn.click(fn=stop_processing, outputs=[progress])
|
| 967 |
|
| 968 |
+
# Main processing with synchronous function
|
| 969 |
analyze_btn.click(
|
| 970 |
fn=process_and_download,
|
| 971 |
inputs=[file_input],
|
|
|
|
| 981 |
|
| 982 |
return app
|
| 983 |
|
| 984 |
+
|
| 985 |
if __name__ == "__main__":
|
| 986 |
app = create_interface()
|
| 987 |
app.launch(share=True)
|