Spaces:
Running
Running
| import re | |
| def safe_extract_text(content) -> str: | |
| """Bezpiecznie wyci膮ga tekst z odpowiedzi LLM (obs艂uguje zar贸wno str jak i list z Gemini).""" | |
| if isinstance(content, str): | |
| return content.strip() | |
| if isinstance(content, list): | |
| texts = [] | |
| for item in content: | |
| if isinstance(item, dict) and "text" in item: | |
| texts.append(str(item["text"])) | |
| elif isinstance(item, str): | |
| texts.append(item) | |
| return " ".join(texts).strip() | |
| # Fallback dla innych typ贸w | |
| return str(content).strip() | |
| def extract_markdown_and_sanitize(raw_content: str, min_length: int = 50) -> str: | |
| """ | |
| Ekstrahuje blok Markdown i przeprowadza sanity checks (d艂ugo艣膰, typowe odmowy LLM). | |
| Zg艂asza ValueError, co pozwala mechanizmowi Watchdog na ponowienie pr贸by. | |
| """ | |
| if "[NO_CHANGE]" in raw_content: | |
| return "[NO_CHANGE]" | |
| # Ekstrakcja bloku Markdown (je艣li obecny) | |
| md_match = re.search( | |
| r"```(?:markdown|md|html)?\s*\n?(.*?)```", | |
| raw_content, | |
| re.DOTALL | re.IGNORECASE, | |
| ) | |
| if md_match: | |
| extracted = md_match.group(1).strip() | |
| else: | |
| # Awaryjne usuwanie backtick贸w | |
| extracted = raw_content.strip() | |
| if extracted.startswith("```"): | |
| first_newline = extracted.find("\n") | |
| if first_newline != -1 and first_newline < 20: | |
| extracted = extracted[first_newline + 1 :] | |
| else: | |
| extracted = extracted[3:] | |
| if extracted.endswith("```"): | |
| extracted = extracted[:-3] | |
| extracted = extracted.strip() | |
| # Sanity checks | |
| if len(extracted) < min_length: | |
| raise ValueError( | |
| f"Wygenerowany tekst jest zbyt kr贸tki ({len(extracted)} znak贸w). Prawdopodobnie b艂膮d generowania." | |
| ) | |
| refusals = [ | |
| "nie mog臋", | |
| "przykro mi", | |
| "as an ai", | |
| "jako model j臋zykowy", | |
| "i cannot", | |
| "nie jestem w stanie", | |
| "przepraszam", | |
| "nie potrafi臋", | |
| ] | |
| extracted_lower = extracted.lower() | |
| # Je艣li odpowied藕 jest bardzo kr贸tka (np. < 500 znak贸w) i zawiera fraz臋 odmowy | |
| if len(extracted) < 500 and any(r in extracted_lower for r in refusals): | |
| raise ValueError("LLM zwr贸ci艂 odmow臋 lub halucynacj臋 zamiast poprawnej tre艣ci.") | |
| return extracted | |