from typing import Dict, List, Optional import logging import re from app.llm.llm_client import LLMClient from app.config import get_settings logger = logging.getLogger(__name__) CONTEXT_SUMMARY_SYSTEM = ( "You are a concise summarizer. Condense the following conversation into a short summary " "that preserves the key topics discussed, any conclusions reached, important facts shared, " "and the overall tone. Keep it under 300 words. Write in third person narrative form." ) def _conversation_role_label(role: str, persona_names: Optional[Dict[str, str]] = None) -> str: if role == "user": return "User" if role == "assistant": return "Assistant" if persona_names and role in persona_names: return persona_names[role] return role.replace("_", " ").title() async def generate_conversation_context_summary( messages: List[dict], llm: LLMClient, persona_names: Optional[Dict[str, str]] = None, max_tokens: int = 1024, ) -> str: """Summarize chat history for LLM context when the transcript exceeds the token budget.""" transcript_lines = [] for msg in messages: content = (msg.get("content") or "").strip() if not content: continue label = _conversation_role_label(msg.get("role", "user"), persona_names) transcript_lines.append(f"{label}: {content}") if not transcript_lines: return "" transcript = "\n".join(transcript_lines) try: summary = await llm.generate( system_prompt=CONTEXT_SUMMARY_SYSTEM, context=[{"role": "user", "content": transcript}], temperature=0.3, max_tokens=max_tokens, ) return (summary or "").strip() except Exception as exc: logger.error("Conversation context summary failed: %s", exc) return "" async def generate_summary_from_messages(messages: List[dict], llm: LLMClient, max_tokens: int = 800) -> str: """ Summarize the conversation using the given LLM client. """ try: app_title = get_settings().app.title full_text = "\n\n".join([f"{m['role']}:\n{m['content']}" for m in messages]) system_prompt = ( f"You are an assistant for {app_title}. Summarize the following chat conversation " "into a well-formatted summary with clear bullet points. " "Please format your response as follows:\n" "- Use bullet points (starting with *) for key insights\n" "- Put each bullet point on a separate line\n" "- Include section headings if appropriate (formatted as **Section Name:**)\n" "- Focus on insights, questions, and actionable advice\n" "- Maximum 10 bullet points\n\n" "Example format:\n" "**Key Insights:**\n" "* First main point about the conversation\n" "* Second important insight\n" "* Third key takeaway\n\n" "**Recommendations:**\n" "* First actionable recommendation\n" "* Second suggestion" ) context = [{"role": "user", "content": f"Chat Log:\n{full_text}"}] summary = await llm.generate( system_prompt=system_prompt, context=context, temperature=0.4, max_tokens=max_tokens ) # Post-process the summary to ensure proper formatting formatted_summary = _format_summary_text(summary.strip()) return formatted_summary except Exception as e: logger.error(f"Error generating summary: {str(e)}") return "Summary generation failed. Please try again later." def _format_summary_text(summary_text: str) -> str: """ Post-process the summary text to ensure proper bullet point formatting. """ # Fix common formatting issues # Add line breaks before bullet points that don't have them summary_text = re.sub(r'(? List[Dict]: """ Parse summary text into structured blocks for better formatting. """ # First, ensure proper formatting summary_text = _format_summary_text(summary_text) lines = summary_text.strip().splitlines() blocks = [] current_block = None def flush_current_block(): if current_block: blocks.append(current_block.copy()) for line in lines: line = line.strip() if not line: continue # Match section headings (e.g. **Title:** or **Title**) heading_match = re.match(r'^\*\*(.+?)\*\*:?$', line) if heading_match: flush_current_block() current_block = {"type": "heading", "text": heading_match.group(1).strip()} flush_current_block() current_block = None continue # Match bullet list items (*, •, or -) bullet_match = re.match(r'^[*•-]\s+(.+)', line) if bullet_match: if current_block is None or current_block["type"] != "list" or current_block.get("style") != "bullet": flush_current_block() current_block = {"type": "list", "style": "bullet", "items": []} current_block["items"].append(bullet_match.group(1).strip()) continue # Match numbered list items number_match = re.match(r'^\d+\.\s+(.+)', line) if number_match: if current_block is None or current_block["type"] != "list" or current_block.get("style") != "numbered": flush_current_block() current_block = {"type": "list", "style": "numbered", "items": []} current_block["items"].append(number_match.group(1).strip()) continue # Default: treat as paragraph flush_current_block() current_block = {"type": "paragraph", "text": line} flush_current_block() current_block = None flush_current_block() # Debug output to help troubleshoot logger.info(f"[DEBUG] Parsed {len(blocks)} blocks from summary") for i, block in enumerate(blocks): if block["type"] == "list": logger.info(f"Block {i}: {block['type']} ({block['style']}) with {len(block['items'])} items") else: logger.info(f"Block {i}: {block['type']}") return blocks def format_summary_for_text_export(summary_text: str) -> str: """ Format summary text specifically for TXT and DOCX exports with proper line breaks. """ formatted_text = _format_summary_text(summary_text) # Add extra spacing for better readability in text formats lines = formatted_text.split('\n') formatted_lines = [] for line in lines: line = line.strip() if not line: continue # Add extra space before section headings if re.match(r'^\*\*(.+?)\*\*:?$', line): if formatted_lines: # Don't add space before first heading formatted_lines.append('') formatted_lines.append(line) formatted_lines.append('') # Space after heading # Add space before bullet points (but group them together) elif re.match(r'^[*•-]\s+', line): # Check if previous line was also a bullet point if formatted_lines and not re.match(r'^[*•-]\s+', formatted_lines[-1]): formatted_lines.append('') # Space before first bullet in group formatted_lines.append(line) else: # Regular paragraph if formatted_lines: formatted_lines.append('') formatted_lines.append(line) return '\n'.join(formatted_lines)