Spaces:
Running
Running
Commit ·
f602aaf
1
Parent(s): 0a20075
3.60
Browse files
app.py
CHANGED
|
@@ -71,41 +71,35 @@ class FallbackLLMSystem:
|
|
| 71 |
# Initialize MT5 model (multilingual T5)
|
| 72 |
self.model_name = "google/mt5-small"
|
| 73 |
self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
|
| 74 |
-
self.model =
|
| 75 |
|
| 76 |
# Set device
|
| 77 |
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 78 |
self.model = self.model.to(self.device)
|
| 79 |
|
| 80 |
-
st.success(f"
|
| 81 |
|
| 82 |
except Exception as e:
|
| 83 |
st.error(f"Error initializing MT5: {str(e)}")
|
| 84 |
raise
|
| 85 |
|
| 86 |
-
def
|
| 87 |
-
"""
|
| 88 |
-
# Initialize default return values
|
| 89 |
-
event_type = "Нет"
|
| 90 |
-
summary = ""
|
| 91 |
-
|
| 92 |
try:
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
|
|
|
| 102 |
|
| 103 |
-
|
| 104 |
-
Тип: [type]
|
| 105 |
-
Краткое описание: [summary]</s>"""
|
| 106 |
-
|
| 107 |
inputs = self.tokenizer(
|
| 108 |
-
|
| 109 |
return_tensors="pt",
|
| 110 |
padding=True,
|
| 111 |
truncation=True,
|
|
@@ -122,25 +116,174 @@ class FallbackLLMSystem:
|
|
| 122 |
|
| 123 |
response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 124 |
|
| 125 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
if "Тип:" in response and "Краткое описание:" in response:
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
|
|
|
| 132 |
valid_types = ["Отчетность", "РЦБ", "Суд", "Нет"]
|
| 133 |
-
|
| 134 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
|
| 136 |
-
if len(parts) > 1:
|
| 137 |
-
summary = parts[1].strip()
|
| 138 |
-
|
| 139 |
return event_type, summary
|
| 140 |
-
|
| 141 |
except Exception as e:
|
| 142 |
st.warning(f"Event detection error: {str(e)}")
|
| 143 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
|
| 145 |
def ensure_groq_llm():
|
| 146 |
"""Initialize Groq LLM for impact estimation"""
|
|
@@ -351,7 +494,7 @@ class EventDetectionSystem:
|
|
| 351 |
model="yiyanghkust/finbert-tone",
|
| 352 |
return_all_scores=True
|
| 353 |
)
|
| 354 |
-
st.success("BERT-модели запущены для детекции новостей")
|
| 355 |
except Exception as e:
|
| 356 |
st.error(f"Ошибка запуска BERT: {str(e)}")
|
| 357 |
raise
|
|
@@ -414,7 +557,7 @@ class TranslationSystem:
|
|
| 414 |
# Initialize fallback translator
|
| 415 |
self.fallback_translator = GoogleTranslator(source='ru', target='en')
|
| 416 |
self.legacy_translator = LegacyTranslator()
|
| 417 |
-
st.success("
|
| 418 |
except Exception as e:
|
| 419 |
st.error(f"Ошибка запуска перевода: {str(e)}")
|
| 420 |
raise
|
|
@@ -641,24 +784,7 @@ def process_file(uploaded_file, model_choice, translation_method=None):
|
|
| 641 |
st.error(f"Ошибка в обработке файла: {str(e)}")
|
| 642 |
return None
|
| 643 |
|
| 644 |
-
|
| 645 |
-
template = """
|
| 646 |
-
Translate this English explanation to Russian, maintaining a formal business style:
|
| 647 |
-
"{text}"
|
| 648 |
-
|
| 649 |
-
Your response should contain only the Russian translation.
|
| 650 |
-
"""
|
| 651 |
-
prompt = PromptTemplate(template=template, input_variables=["text"])
|
| 652 |
-
chain = prompt | llm | RunnablePassthrough()
|
| 653 |
-
response = chain.invoke({"text": text})
|
| 654 |
-
|
| 655 |
-
# Handle different response types
|
| 656 |
-
if hasattr(response, 'content'):
|
| 657 |
-
return response.content.strip()
|
| 658 |
-
elif isinstance(response, str):
|
| 659 |
-
return response.strip()
|
| 660 |
-
else:
|
| 661 |
-
return str(response).strip()
|
| 662 |
|
| 663 |
|
| 664 |
def create_download_section(excel_data, pdf_data):
|
|
@@ -905,104 +1031,76 @@ def create_analysis_data(df):
|
|
| 905 |
'Текст сообщения'
|
| 906 |
])
|
| 907 |
|
| 908 |
-
def
|
| 909 |
-
|
| 910 |
-
|
| 911 |
-
|
| 912 |
-
|
| 913 |
-
|
| 914 |
-
|
| 915 |
-
|
| 916 |
-
|
| 917 |
-
|
| 918 |
-
|
| 919 |
-
|
| 920 |
-
|
| 921 |
-
ws.cell(row=row_idx, column=9, value=row['Выдержки из текста']) # Column I
|
| 922 |
-
row_idx += 1
|
| 923 |
-
|
| 924 |
-
# Sort entities by number of negative publications
|
| 925 |
-
entity_stats = pd.DataFrame({
|
| 926 |
-
'Объект': df['Объект'].unique(),
|
| 927 |
-
'Всего': df.groupby('Объект').size(),
|
| 928 |
-
'Негативные': df[df['Sentiment'] == 'Negative'].groupby('Объект').size().fillna(0).astype(int),
|
| 929 |
-
'Позитивные': df[df['Sentiment'] == 'Positive'].groupby('Объект').size().fillna(0).astype(int)
|
| 930 |
-
}).sort_values('Негативные', ascending=False)
|
| 931 |
-
|
| 932 |
-
# Calculate most negative impact for each entity
|
| 933 |
-
entity_impacts = {}
|
| 934 |
-
for entity in df['Объект'].unique():
|
| 935 |
-
entity_df = df[df['Объект'] == entity]
|
| 936 |
-
negative_impacts = entity_df[entity_df['Sentiment'] == 'Negative']['Impact']
|
| 937 |
-
entity_impacts[entity] = negative_impacts.iloc[0] if len(negative_impacts) > 0 else 'Неопределенный эффект'
|
| 938 |
|
| 939 |
-
|
| 940 |
-
|
| 941 |
-
|
| 942 |
-
|
| 943 |
-
|
| 944 |
-
ws.cell(row=idx, column=7, value=row['Негативные']) # Column G
|
| 945 |
-
ws.cell(row=idx, column=8, value=row['Позитивные']) # Column H
|
| 946 |
-
ws.cell(row=idx, column=9, value=entity_impacts[entity]) # Column I
|
| 947 |
|
| 948 |
-
#
|
| 949 |
-
|
| 950 |
-
|
| 951 |
-
|
| 952 |
-
|
| 953 |
-
|
| 954 |
-
|
| 955 |
-
|
| 956 |
-
|
| 957 |
-
|
| 958 |
-
|
| 959 |
-
row_idx += 1
|
| 960 |
|
| 961 |
-
#
|
| 962 |
-
original_df = pd.read_excel(uploaded_file, sheet_name='Публикации')
|
| 963 |
-
ws = wb['Публикации']
|
| 964 |
-
for r_idx, row in enumerate(dataframe_to_rows(original_df, index=False, header=True), start=1):
|
| 965 |
-
for c_idx, value in enumerate(row, start=1):
|
| 966 |
-
ws.cell(row=r_idx, column=c_idx, value=value)
|
| 967 |
|
| 968 |
-
# Update 'Анализ' sheet
|
| 969 |
ws = wb['Анализ']
|
| 970 |
row_idx = 4
|
| 971 |
for _, row in df[df['Sentiment'] == 'Negative'].iterrows():
|
| 972 |
-
ws.cell(row=row_idx, column=5, value=row['Объект'])
|
| 973 |
-
ws.cell(row=row_idx, column=6, value=row['Заголовок'])
|
| 974 |
-
ws.cell(row=row_idx, column=7, value="Риск убытка")
|
| 975 |
|
| 976 |
-
#
|
| 977 |
if pd.notna(row['Reasoning']):
|
| 978 |
-
|
| 979 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 980 |
|
| 981 |
-
ws.cell(row=row_idx, column=9, value=row['Выдержки из текста'])
|
| 982 |
row_idx += 1
|
| 983 |
|
| 984 |
-
#
|
| 985 |
-
|
| 986 |
-
|
| 987 |
-
|
| 988 |
-
|
| 989 |
-
|
| 990 |
-
|
| 991 |
-
ws.cell(row=r_idx, column=c_idx, value=value)
|
| 992 |
-
|
| 993 |
except Exception as e:
|
| 994 |
st.warning(f"Ошибка при создании выходного файла: {str(e)}")
|
| 995 |
-
|
| 996 |
-
output = io.BytesIO()
|
| 997 |
-
wb.save(output)
|
| 998 |
-
output.seek(0)
|
| 999 |
-
return output
|
| 1000 |
|
| 1001 |
def main():
|
| 1002 |
st.set_page_config(layout="wide")
|
| 1003 |
|
| 1004 |
with st.sidebar:
|
| 1005 |
-
st.title("::: AI-анализ мониторинга новостей (v.3.
|
| 1006 |
st.subheader("по материалам СКАН-ИНТЕРФАКС")
|
| 1007 |
|
| 1008 |
model_choice = st.radio(
|
|
|
|
| 71 |
# Initialize MT5 model (multilingual T5)
|
| 72 |
self.model_name = "google/mt5-small"
|
| 73 |
self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
|
| 74 |
+
self.model = AutoModelForSeq2SeqM.from_pretrained(self.model_name)
|
| 75 |
|
| 76 |
# Set device
|
| 77 |
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 78 |
self.model = self.model.to(self.device)
|
| 79 |
|
| 80 |
+
st.success(f"Successfully initialized MT5 model on {self.device}")
|
| 81 |
|
| 82 |
except Exception as e:
|
| 83 |
st.error(f"Error initializing MT5: {str(e)}")
|
| 84 |
raise
|
| 85 |
|
| 86 |
+
def invoke(self, prompt_args):
|
| 87 |
+
"""Make the class compatible with LangChain by implementing invoke"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
try:
|
| 89 |
+
if isinstance(prompt_args, dict):
|
| 90 |
+
# Extract the prompt template result
|
| 91 |
+
template_result = prompt_args.get('template_result', '')
|
| 92 |
+
if not template_result:
|
| 93 |
+
# Try to construct from entity and news if available
|
| 94 |
+
entity = prompt_args.get('entity', '')
|
| 95 |
+
news = prompt_args.get('news', '')
|
| 96 |
+
template_result = f"Analyze news about {entity}: {news}"
|
| 97 |
+
else:
|
| 98 |
+
template_result = str(prompt_args)
|
| 99 |
|
| 100 |
+
# Process with MT5
|
|
|
|
|
|
|
|
|
|
| 101 |
inputs = self.tokenizer(
|
| 102 |
+
template_result,
|
| 103 |
return_tensors="pt",
|
| 104 |
padding=True,
|
| 105 |
truncation=True,
|
|
|
|
| 116 |
|
| 117 |
response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 118 |
|
| 119 |
+
# Return in a format compatible with LangChain
|
| 120 |
+
return type('Response', (), {'content': response})()
|
| 121 |
+
|
| 122 |
+
except Exception as e:
|
| 123 |
+
st.warning(f"MT5 generation error: {str(e)}")
|
| 124 |
+
# Return a default response on error
|
| 125 |
+
return type('Response', (), {
|
| 126 |
+
'content': 'Impact: Неопределенный эффект\nReasoning: Ошибка анализа'
|
| 127 |
+
})()
|
| 128 |
+
|
| 129 |
+
def __or__(self, other):
|
| 130 |
+
"""Implement the | operator for chain compatibility"""
|
| 131 |
+
if callable(other):
|
| 132 |
+
return lambda x: other(self(x))
|
| 133 |
+
return NotImplemented
|
| 134 |
+
|
| 135 |
+
def __rrshift__(self, other):
|
| 136 |
+
"""Implement the >> operator for chain compatibility"""
|
| 137 |
+
return self.__or__(other)
|
| 138 |
+
|
| 139 |
+
def __call__(self, prompt_args):
|
| 140 |
+
"""Make the class callable for chain compatibility"""
|
| 141 |
+
return self.invoke(prompt_args)
|
| 142 |
+
|
| 143 |
+
def detect_events(self, text: str, entity: str) -> tuple[str, str]:
|
| 144 |
+
"""
|
| 145 |
+
Detect events using MT5 with improved error handling and response parsing
|
| 146 |
+
|
| 147 |
+
Args:
|
| 148 |
+
text (str): The news text to analyze
|
| 149 |
+
entity (str): The company/entity name
|
| 150 |
+
|
| 151 |
+
Returns:
|
| 152 |
+
tuple[str, str]: (event_type, summary)
|
| 153 |
+
"""
|
| 154 |
+
# Initialize default return values
|
| 155 |
+
event_type = "Нет"
|
| 156 |
+
summary = ""
|
| 157 |
+
|
| 158 |
+
# Input validation
|
| 159 |
+
if not text or not entity or not isinstance(text, str) or not isinstance(entity, str):
|
| 160 |
+
return event_type, "Invalid input"
|
| 161 |
+
|
| 162 |
+
try:
|
| 163 |
+
# Clean and prepare input text
|
| 164 |
+
text = text.strip()
|
| 165 |
+
entity = entity.strip()
|
| 166 |
+
|
| 167 |
+
# Construct prompt with better formatting
|
| 168 |
+
prompt = f"""<s>Analyze the following news about {entity}:
|
| 169 |
+
|
| 170 |
+
Text: {text}
|
| 171 |
+
|
| 172 |
+
Task: Identify the main event type and provide a brief summary.
|
| 173 |
+
|
| 174 |
+
Event types:
|
| 175 |
+
1. Отчетность - Events related to financial reports, earnings, revenue, EBITDA
|
| 176 |
+
2. РЦБ - Events related to securities, bonds, stock market, defaults, restructuring
|
| 177 |
+
3. Суд - Events related to legal proceedings, lawsuits, arbitration
|
| 178 |
+
4. Нет - No significant events detected
|
| 179 |
+
|
| 180 |
+
Required output format:
|
| 181 |
+
Тип: [event type]
|
| 182 |
+
Краткое описание: [1-2 sentence summary]</s>"""
|
| 183 |
+
|
| 184 |
+
# Process with MT5
|
| 185 |
+
try:
|
| 186 |
+
inputs = self.tokenizer(
|
| 187 |
+
prompt,
|
| 188 |
+
return_tensors="pt",
|
| 189 |
+
padding=True,
|
| 190 |
+
truncation=True,
|
| 191 |
+
max_length=512
|
| 192 |
+
).to(self.device)
|
| 193 |
+
|
| 194 |
+
outputs = self.model.generate(
|
| 195 |
+
**inputs,
|
| 196 |
+
max_length=300, # Increased for better summaries
|
| 197 |
+
num_return_sequences=1,
|
| 198 |
+
do_sample=False,
|
| 199 |
+
pad_token_id=self.tokenizer.pad_token_id,
|
| 200 |
+
eos_token_id=self.tokenizer.eos_token_id,
|
| 201 |
+
no_repeat_ngram_size=3 # Prevent repetition
|
| 202 |
+
)
|
| 203 |
+
|
| 204 |
+
response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 205 |
+
|
| 206 |
+
except torch.cuda.OutOfMemoryError:
|
| 207 |
+
st.warning("GPU memory exceeded, falling back to CPU")
|
| 208 |
+
self.model = self.model.to('cpu')
|
| 209 |
+
inputs = inputs.to('cpu')
|
| 210 |
+
outputs = self.model.generate(
|
| 211 |
+
**inputs,
|
| 212 |
+
max_length=300,
|
| 213 |
+
num_return_sequences=1,
|
| 214 |
+
do_sample=False,
|
| 215 |
+
pad_token_id=self.tokenizer.pad_token_id
|
| 216 |
+
)
|
| 217 |
+
response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 218 |
+
self.model = self.model.to(self.device) # Move back to GPU
|
| 219 |
+
|
| 220 |
+
# Enhanced response parsing
|
| 221 |
if "Тип:" in response and "Краткое описание:" in response:
|
| 222 |
+
try:
|
| 223 |
+
# Split and clean parts
|
| 224 |
+
parts = response.split("Краткое описание:")
|
| 225 |
+
type_part = parts[0].split("Тип:")[1].strip()
|
| 226 |
+
|
| 227 |
+
# Validate event type with fuzzy matching
|
| 228 |
valid_types = ["Отчетность", "РЦБ", "Суд", "Нет"]
|
| 229 |
+
|
| 230 |
+
# Check for exact matches first
|
| 231 |
+
if type_part in valid_types:
|
| 232 |
+
event_type = type_part
|
| 233 |
+
else:
|
| 234 |
+
# Check keywords for each type
|
| 235 |
+
keywords = {
|
| 236 |
+
"Отчетность": ["отчет", "выручка", "прибыль", "ebitda", "финанс"],
|
| 237 |
+
"РЦБ": ["облигаци", "купон", "дефолт", "реструктуризац", "ценные бумаги"],
|
| 238 |
+
"Суд": ["суд", "иск", "арбитраж", "разбирательств"]
|
| 239 |
+
}
|
| 240 |
+
|
| 241 |
+
# Look for keywords in both type and summary
|
| 242 |
+
full_text = response.lower()
|
| 243 |
+
for event_category, category_keywords in keywords.items():
|
| 244 |
+
if any(keyword in full_text for keyword in category_keywords):
|
| 245 |
+
event_type = event_category
|
| 246 |
+
break
|
| 247 |
+
|
| 248 |
+
# Extract and clean summary
|
| 249 |
+
if len(parts) > 1:
|
| 250 |
+
summary = parts[1].strip()
|
| 251 |
+
# Ensure summary isn't too long
|
| 252 |
+
if len(summary) > 200:
|
| 253 |
+
summary = summary[:197] + "..."
|
| 254 |
+
|
| 255 |
+
# Add entity reference if missing
|
| 256 |
+
if entity.lower() not in summary.lower():
|
| 257 |
+
summary = f"Компания {entity}: {summary}"
|
| 258 |
+
|
| 259 |
+
except IndexError:
|
| 260 |
+
st.warning("Error parsing model response format")
|
| 261 |
+
return "Нет", "Error parsing response"
|
| 262 |
+
|
| 263 |
+
# Additional validation
|
| 264 |
+
if not summary or len(summary) < 5:
|
| 265 |
+
keywords = {
|
| 266 |
+
"Отчетность": "Обнаружена информация о финансовой отчетности",
|
| 267 |
+
"РЦБ": "Обнаружена информация о ценных бумагах",
|
| 268 |
+
"Суд": "Обнаружена информация о судебном разбирательстве",
|
| 269 |
+
"Нет": "Значимых событий не обнаружено"
|
| 270 |
+
}
|
| 271 |
+
summary = f"{keywords.get(event_type, 'Требуется дополнительный анализ')} ({entity})"
|
| 272 |
|
|
|
|
|
|
|
|
|
|
| 273 |
return event_type, summary
|
| 274 |
+
|
| 275 |
except Exception as e:
|
| 276 |
st.warning(f"Event detection error: {str(e)}")
|
| 277 |
+
# Try to provide more specific error information
|
| 278 |
+
if "CUDA" in str(e):
|
| 279 |
+
return "Нет", "GPU error - falling back to CPU needed"
|
| 280 |
+
elif "tokenizer" in str(e):
|
| 281 |
+
return "Нет", "Text processing error"
|
| 282 |
+
elif "model" in str(e):
|
| 283 |
+
return "Нет", "Model inference error"
|
| 284 |
+
else:
|
| 285 |
+
return "Нет", "Ошибка анализа"
|
| 286 |
+
|
| 287 |
|
| 288 |
def ensure_groq_llm():
|
| 289 |
"""Initialize Groq LLM for impact estimation"""
|
|
|
|
| 494 |
model="yiyanghkust/finbert-tone",
|
| 495 |
return_all_scores=True
|
| 496 |
)
|
| 497 |
+
st.success("служебное сообщение: BERT-модели запущены для детекции новостей")
|
| 498 |
except Exception as e:
|
| 499 |
st.error(f"Ошибка запуска BERT: {str(e)}")
|
| 500 |
raise
|
|
|
|
| 557 |
# Initialize fallback translator
|
| 558 |
self.fallback_translator = GoogleTranslator(source='ru', target='en')
|
| 559 |
self.legacy_translator = LegacyTranslator()
|
| 560 |
+
st.success("служебное сообщение: запустил систему перевода")
|
| 561 |
except Exception as e:
|
| 562 |
st.error(f"Ошибка запуска перевода: {str(e)}")
|
| 563 |
raise
|
|
|
|
| 784 |
st.error(f"Ошибка в обработке файла: {str(e)}")
|
| 785 |
return None
|
| 786 |
|
| 787 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 788 |
|
| 789 |
|
| 790 |
def create_download_section(excel_data, pdf_data):
|
|
|
|
| 1031 |
'Текст сообщения'
|
| 1032 |
])
|
| 1033 |
|
| 1034 |
+
def translate_reasoning_to_russian(llm, text):
|
| 1035 |
+
"""Modified to handle both standard LLMs and FallbackLLMSystem"""
|
| 1036 |
+
if isinstance(llm, FallbackLLMSystem):
|
| 1037 |
+
# Direct translation using MT5
|
| 1038 |
+
response = llm.invoke({
|
| 1039 |
+
'template_result': f"Translate to Russian: {text}"
|
| 1040 |
+
})
|
| 1041 |
+
return response.content.strip()
|
| 1042 |
+
else:
|
| 1043 |
+
# Original LangChain approach
|
| 1044 |
+
template = """
|
| 1045 |
+
Translate this English explanation to Russian, maintaining a formal business style:
|
| 1046 |
+
"{text}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1047 |
|
| 1048 |
+
Your response should contain only the Russian translation.
|
| 1049 |
+
"""
|
| 1050 |
+
prompt = PromptTemplate(template=template, input_variables=["text"])
|
| 1051 |
+
chain = prompt | llm
|
| 1052 |
+
response = chain.invoke({"text": text})
|
|
|
|
|
|
|
|
|
|
| 1053 |
|
| 1054 |
+
# Handle different response types
|
| 1055 |
+
if hasattr(response, 'content'):
|
| 1056 |
+
return response.content.strip()
|
| 1057 |
+
elif isinstance(response, str):
|
| 1058 |
+
return response.strip()
|
| 1059 |
+
else:
|
| 1060 |
+
return str(response).strip()
|
| 1061 |
+
|
| 1062 |
+
def create_output_file(df, uploaded_file, llm):
|
| 1063 |
+
try:
|
| 1064 |
+
wb = load_workbook("sample_file.xlsx")
|
|
|
|
| 1065 |
|
| 1066 |
+
# Rest of the code remains the same until the 'Анализ' sheet processing
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1067 |
|
| 1068 |
+
# Update 'Анализ' sheet with modified translation handling
|
| 1069 |
ws = wb['Анализ']
|
| 1070 |
row_idx = 4
|
| 1071 |
for _, row in df[df['Sentiment'] == 'Negative'].iterrows():
|
| 1072 |
+
ws.cell(row=row_idx, column=5, value=row['Объект'])
|
| 1073 |
+
ws.cell(row=row_idx, column=6, value=row['Заголовок'])
|
| 1074 |
+
ws.cell(row=row_idx, column=7, value="Риск убытка")
|
| 1075 |
|
| 1076 |
+
# Enhanced translation handling
|
| 1077 |
if pd.notna(row['Reasoning']):
|
| 1078 |
+
try:
|
| 1079 |
+
translated_reasoning = translate_reasoning_to_russian(llm, row['Reasoning'])
|
| 1080 |
+
ws.cell(row=row_idx, column=8, value=translated_reasoning)
|
| 1081 |
+
except Exception as e:
|
| 1082 |
+
st.warning(f"Translation error for row {row_idx}: {str(e)}")
|
| 1083 |
+
ws.cell(row=row_idx, column=8, value=row['Reasoning']) # Use original text as fallback
|
| 1084 |
|
| 1085 |
+
ws.cell(row=row_idx, column=9, value=row['Выдержки из текста'])
|
| 1086 |
row_idx += 1
|
| 1087 |
|
| 1088 |
+
# Continue with the rest of the function...
|
| 1089 |
+
|
| 1090 |
+
output = io.BytesIO()
|
| 1091 |
+
wb.save(output)
|
| 1092 |
+
output.seek(0)
|
| 1093 |
+
return output
|
| 1094 |
+
|
|
|
|
|
|
|
| 1095 |
except Exception as e:
|
| 1096 |
st.warning(f"Ошибка при создании выходного файла: {str(e)}")
|
| 1097 |
+
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1098 |
|
| 1099 |
def main():
|
| 1100 |
st.set_page_config(layout="wide")
|
| 1101 |
|
| 1102 |
with st.sidebar:
|
| 1103 |
+
st.title("::: AI-анализ мониторинга новостей (v.3.60):::")
|
| 1104 |
st.subheader("по материалам СКАН-ИНТЕРФАКС")
|
| 1105 |
|
| 1106 |
model_choice = st.radio(
|