First_agent_template

Sleeping

App Files Files Community

USER-GNEXUSES commited on Feb 19, 2025

Commit

1fbcb0c

verified ·

1 Parent(s): e2b3f43

Update app.py

Browse files

Files changed (1) hide show

app.py +112 -1158

app.py CHANGED Viewed

@@ -1,1170 +1,124 @@
-# process_discovery_engine.py
-import numpy as np
-import pandas as pd
-from typing import Dict, List, Tuple, Optional
-from sklearn.feature_extraction.text import TfidfVectorizer
-from sklearn.metrics.pairwise import cosine_similarity
 import spacy
-import json
-import re
-import networkx as nx
-from sklearn.cluster import DBSCAN
-class ProcessDiscoveryEngine:
-    """
-    Discovers and analyzes business processes from various data sources
-    including logs, documents, and recorded user activities.
-    """
-    def __init__(self, config: Dict):
-        """
-        Initialize the process discovery engine.
-        Args:
-            config: Configuration dictionary with parameters
-        """
-        self.min_frequency = config.get('min_frequency', 0.05)
-        self.time_threshold = config.get('time_threshold', 60)  # seconds
-        self.similarity_threshold = config.get('similarity_threshold', 0.75)
-        self.process_graph = nx.DiGraph()
-    def ingest_log_data(self, log_data: pd.DataFrame) -> bool:
-        """
-        Ingest process log data from system logs.
-        Args:
-            log_data: DataFrame containing log entries with timestamp, user, action columns
-        Returns:
-            bool: Success status
-        """
-        if 'timestamp' not in log_data.columns or 'action' not in log_data.columns:
-            return False
-        # Sort by timestamp
-        sorted_logs = log_data.sort_values('timestamp')
-        # Group by case_id if available
-        if 'case_id' in sorted_logs.columns:
-            case_groups = sorted_logs.groupby('case_id')
-            for case_id, case_data in case_groups:
-                self._process_sequence(case_data['action'].tolist(),
-                                      source=f"log:{case_id}")
-        else:
-            # Try to identify sessions based on time gaps
-            self._segment_and_process_logs(sorted_logs)
-        return True
-    def ingest_screen_recordings(self, recording_analysis: List[Dict]) -> bool:
-        """
-        Ingest analyzed screen recording data.
-        Args:
-            recording_analysis: List of dictionaries containing screen activities
-        Returns:
-            bool: Success status
-        """
-        for session in recording_analysis:
-            if 'actions' in session and isinstance(session['actions'], list):
-                action_sequence = [a['activity'] for a in session['actions']
-                                  if 'activity' in a]
-                self._process_sequence(action_sequence,
-                                     source=f"recording:{session.get('id', 'unknown')}")
-        return True
-    def _segment_and_process_logs(self, logs: pd.DataFrame) -> None:
-        """
-        Segment logs into probable process instances based on time gaps.
-        Args:
-            logs: DataFrame of logs sorted by timestamp
-        """
-        logs['timestamp'] = pd.to_datetime(logs['timestamp'])
-        logs['time_diff'] = logs['timestamp'].diff().dt.total_seconds()
-        # Mark new sequences where time difference exceeds threshold
-        new_sequence = logs['time_diff'] > self.time_threshold
-        logs['sequence_id'] = new_sequence.cumsum()
-        # Process each sequence
-        for seq_id, sequence in logs.groupby('sequence_id'):
-            self._process_sequence(sequence['action'].tolist(),
-                                  source=f"timegap:{seq_id}")
-    def _process_sequence(self, actions: List[str], source: str) -> None:
-        """
-        Process a sequence of actions into the process graph.
-        Args:
-            actions: List of action names in sequence
-            source: Data source identifier
-        """
-        for i in range(len(actions) - 1):
-            current = actions[i]
-            next_action = actions[i+1]
-            # Add nodes if they don't exist
-            if current not in self.process_graph:
-                self.process_graph.add_node(current, count=0, sources=set())
-            if next_action not in self.process_graph:
-                self.process_graph.add_node(next_action, count=0, sources=set())
-            # Update node data
-            self.process_graph.nodes[current]['count'] += 1
-            self.process_graph.nodes[current]['sources'].add(source)
-            # Add or update edge
-            if self.process_graph.has_edge(current, next_action):
-                self.process_graph[current][next_action]['weight'] += 1
-                self.process_graph[current][next_action]['sources'].add(source)
-            else:
-                self.process_graph.add_edge(current, next_action,
-                                           weight=1, sources={source})
-    def discover_main_process_paths(self) -> List[Dict]:
-        """
-        Discover the main process paths from the constructed graph.
-        Returns:
-            List of dictionaries describing main process paths
-        """
-        # Filter edges by frequency
-        total_transitions = sum(data['weight'] for _, _, data in self.process_graph.edges(data=True))
-        if total_transitions == 0:
-            return []
-        min_edge_weight = total_transitions * self.min_frequency
-        significant_edges = [(u, v) for u, v, d in self.process_graph.edges(data=True)
-                            if d['weight'] > min_edge_weight]
-        # Create subgraph with only significant edges
-        significant_graph = self.process_graph.edge_subgraph(significant_edges).copy()
-        # Find all simple paths from potential start nodes to end nodes
-        start_nodes = [n for n in significant_graph.nodes()
-                      if significant_graph.in_degree(n) == 0 or
-                      significant_graph.in_degree(n) < significant_graph.out_degree(n)]
-        end_nodes = [n for n in significant_graph.nodes()
-                    if significant_graph.out_degree(n) == 0 or
-                    significant_graph.out_degree(n) < significant_graph.in_degree(n)]
-        # If no clear start/end, use nodes with highest centrality
-        if not start_nodes:
-            centrality = nx.degree_centrality(significant_graph)
-            start_nodes = [max(centrality, key=centrality.get)]
-        if not end_nodes:
-            centrality = nx.degree_centrality(significant_graph)
-            end_nodes = [max(centrality, key=centrality.get)]
-        # Find all paths between start and end nodes
-        all_paths = []
-        for start in start_nodes:
-            for end in end_nodes:
-                try:
-                    paths = list(nx.all_simple_paths(significant_graph, start, end))
-                    all_paths.extend(paths)
-                except nx.NetworkXNoPath:
-                    continue
-        # Calculate path frequency and return top paths
-        path_data = []
-        for path in all_paths:
-            # Calculate path strength as minimum edge weight along path
-            edge_weights = [significant_graph[path[i]][path[i+1]]['weight']
-                          for i in range(len(path)-1)]
-            path_strength = min(edge_weights) if edge_weights else 0
-            path_data.append({
-                'path': path,
-                'strength': path_strength,
-                'length': len(path),
-                'avg_edge_weight': sum(edge_weights) / len(edge_weights) if edge_weights else 0
-            })
-        # Sort by path strength descending
-        path_data.sort(key=lambda x: x['strength'], reverse=True)
-        return path_data
-    def identify_process_variants(self) -> List[Dict]:
-        """
-        Identify variants of the same basic process.
-        Returns:
-            List of process variant clusters
-        """
-        if len(self.process_graph) < 2:
-            return []
-        # Extract features for clustering
-        paths = self.discover_main_process_paths()
-        if not paths:
-            return []
-        # Create feature vectors from paths
-        all_activities = sorted(list(self.process_graph.nodes()))
-        activity_indices = {act: i for i, act in enumerate(all_activities)}
-        # Create feature vectors (activity presence and position)
-        feature_vectors = []
-        for path_data in paths:
-            path = path_data['path']
-            vector = np.zeros(len(all_activities) * 2)
-            # Mark presence and relative position of activities
-            for pos, activity in enumerate(path):
-                idx = activity_indices[activity]
-                vector[idx] = 1  # presence
-                vector[idx + len(all_activities)] = pos / len(path)  # relative position
-            feature_vectors.append(vector)
-        # Cluster paths using DBSCAN
-        if len(feature_vectors) < 2:
-            return [{'variant_id': 0, 'paths': paths}]
-        clustering = DBSCAN(eps=0.3, min_samples=1).fit(feature_vectors)
-        labels = clustering.labels_
-        # Group paths by cluster
-        variants = {}
-        for i, label in enumerate(labels):
-            label_str = str(label)
-            if label_str not in variants:
-                variants[label_str] = []
-            variants[label_str].append(paths[i])
-        # Format result
-        result = [
-            {'variant_id': variant_id, 'paths': variant_paths}
-            for variant_id, variant_paths in variants.items()
-        ]
-        return result
-    def get_process_stats(self) -> Dict:
-        """
-        Get statistics about the discovered process.
-        Returns:
-            Dictionary with process statistics
-        """
-        if not self.process_graph:
-            return {"error": "No process data available"}
-        stats = {
-            "num_activities": len(self.process_graph.nodes()),
-            "num_transitions": len(self.process_graph.edges()),
-            "most_frequent_activities": [],
-            "most_frequent_transitions": [],
-            "process_complexity": 0,
-            "data_sources": set()
-        }
-        # Most frequent activities
-        activities = [(node, data['count'])
-                     for node, data in self.process_graph.nodes(data=True)]
-        activities.sort(key=lambda x: x[1], reverse=True)
-        stats["most_frequent_activities"] = activities[:10]
-        # Most frequent transitions
-        transitions = [(u, v, data['weight'])
-                      for u, v, data in self.process_graph.edges(data=True)]
-        transitions.sort(key=lambda x: x[2], reverse=True)
-        stats["most_frequent_transitions"] = transitions[:10]
-        # Process complexity (using Control-Flow Complexity metric)
-        stats["process_complexity"] = sum(self.process_graph.out_degree(n) for n in self.process_graph.nodes())
-        # Data sources
-        for _, data in self.process_graph.nodes(data=True):
-            if 'sources' in data:
-                stats["data_sources"].update(data['sources'])
-        stats["data_sources"] = list(stats["data_sources"])
-        return stats
-    def export_process_model(self, format_type: str = 'bpmn') -> Dict:
-        """
-        Export the discovered process in the specified format.
-        Args:
-            format_type: Output format ('bpmn', 'petri_net', or 'json')
-        Returns:
-            Dictionary with export data and metadata
-        """
-        if format_type == 'json':
-            nodes = [{"id": n, "count": data.get('count', 0)}
-                    for n, data in self.process_graph.nodes(data=True)]
-            edges = [{"source": u, "target": v, "weight": data.get('weight', 0)}
-                    for u, v, data in self.process_graph.edges(data=True)]
-            return {
-                "format": "json",
-                "process_model": {
-                    "nodes": nodes,
-                    "edges": edges
-                }
-            }
-        elif format_type == 'bpmn':
-            # Basic BPMN conversion (simplified)
-            # In a real implementation, this would generate actual BPMN XML
-            return {
-                "format": "bpmn",
-                "process_model": {
-                    "process_id": "discovered_process",
-                    "activities": list(self.process_graph.nodes()),
-                    "flows": [(u, v) for u, v in self.process_graph.edges()],
-                    "gateways": self._identify_potential_gateways()
-                }
-            }
-        elif format_type == 'petri_net':
-            # Basic Petri net conversion (simplified)
-            return {
-                "format": "petri_net",
-                "process_model": {
-                    "places": self._generate_petri_net_places(),
-                    "transitions": list(self.process_graph.nodes()),
-                    "arcs": self._generate_petri_net_arcs()
-                }
-            }
-        else:
-            return {"error": f"Unsupported export format: {format_type}"}
-    def _identify_potential_gateways(self) -> List[Dict]:
-        """
-        Identify potential gateways in the process based on branching.
-        Returns:
-            List of potential gateway nodes
-        """
-        gateways = []
-        for node in self.process_graph.nodes():
-            in_degree = self.process_graph.in_degree(node)
-            out_degree = self.process_graph.out_degree(node)
-            # Potential XOR-split (one input, multiple outputs)
-            if in_degree == 1 and out_degree > 1:
-                gateways.append({
-                    "id": f"xor_split_{node}",
-                    "type": "exclusive_gateway",
-                    "direction": "split",
-                    "attached_to": node
-                })
-            # Potential XOR-join (multiple inputs, one output)
-            elif in_degree > 1 and out_degree == 1:
-                gateways.append({
-                    "id": f"xor_join_{node}",
-                    "type": "exclusive_gateway",
-                    "direction": "join",
-                    "attached_to": node
-                })
-            # Potential AND-split/join or complex gateway
-            elif in_degree > 1 and out_degree > 1:
-                gateways.append({
-                    "id": f"complex_{node}",
-                    "type": "complex_gateway",
-                    "direction": "mixed",
-                    "attached_to": node
-                })
-        return gateways
-    def _generate_petri_net_places(self) -> List[str]:
-        """
-        Generate places for a Petri net representation.
-        Returns:
-            List of place IDs
-        """
-        places = []
-        # Generate places between each pair of activities
-        for u, v in self.process_graph.edges():
-            places.append(f"p_{u}_{v}")
-        # Add start and end places
-        start_nodes = [n for n in self.process_graph.nodes()
-                      if self.process_graph.in_degree(n) == 0]
-        for node in start_nodes:
-            places.append(f"p_start_{node}")
-        end_nodes = [n for n in self.process_graph.nodes()
-                    if self.process_graph.out_degree(n) == 0]
-        for node in end_nodes:
-            places.append(f"p_{node}_end")
-        return places
-    def _generate_petri_net_arcs(self) -> List[Tuple[str, str]]:
-        """
-        Generate arcs for a Petri net representation.
-        Returns:
-            List of (source, target) tuples representing arcs
-        """
-        arcs = []
-        # Connect transitions through places
-        for u, v in self.process_graph.edges():
-            place = f"p_{u}_{v}"
-            arcs.append((u, place))
-            arcs.append((place, v))
-        # Connect start places to initial transitions
-        start_nodes = [n for n in self.process_graph.nodes()
-                      if self.process_graph.in_degree(n) == 0]
-        for node in start_nodes:
-            arcs.append((f"p_start_{node}", node))
-        # Connect final transitions to end places
-        end_nodes = [n for n in self.process_graph.nodes()
-                    if self.process_graph.out_degree(n) == 0]
-        for node in end_nodes:
-            arcs.append((node, f"p_{node}_end"))
-        return arcs
-# requirements_analysis_module.py
-class RequirementsAnalysisModule:
     """
-    Analyzes business requirements and connects them to processes.
-    Extracts structured data from natural language requirements.
     """
-    def __init__(self, config: Dict = None):
-        """
-        Initialize the requirements analysis module.
-        Args:
-            config: Configuration dictionary
-        """
-        self.config = config or {}
-        # Load NLP model
-        try:
-            self.nlp = spacy.load("en_core_web_md")
-        except:
-            # Fallback to small model if medium not available
-            self.nlp = spacy.load("en_core_web_sm")
-        # Initialize requirements storage
-        self.requirements = []
-        # Initialize taxonomy and patterns
-        self._load_taxonomies()
-        self._compile_requirement_patterns()
-    def _load_taxonomies(self) -> None:
-        """Load or initialize the business process taxonomy."""
-        # In production, this would load from a file or database
-        self.process_taxonomy = {
-            "financial": [
-                "invoice processing", "accounts payable", "accounts receivable",
-                "payment processing", "financial reporting", "expense management"
-            ],
-            "hr": [
-                "onboarding", "offboarding", "payroll", "recruitment",
-                "employee management", "benefits administration", "time tracking"
-            ],
-            "customer_service": [
-                "ticket management", "customer support", "inquiry handling",
-                "complaint resolution", "feedback processing"
-            ],
-            "operations": [
-                "inventory management", "supply chain", "logistics",
-                "order processing", "shipping", "receiving", "quality control"
-            ],
-            "sales": [
-                "lead management", "opportunity tracking", "quote generation",
-                "contract management", "sales reporting", "commission calculation"
-            ],
-            "it": [
-                "access management", "incident management", "change management",
-                "service request", "problem management", "release management"
-            ]
-        }
-        # Complexity indicators for requirements
-        self.complexity_indicators = {
-            "high": [
-                "complex", "multiple systems", "integration", "decision tree",
-                "exception handling", "compliance", "regulatory", "manual review",
-                "approval workflow", "conditional logic", "business rules"
-            ],
-            "medium": [
-                "validation", "verification", "notification", "alert",
-                "scheduled", "reporting", "dashboard", "data transformation"
-            ],
-            "low": [
-                "simple", "straightforward", "data entry", "form filling",
-                "standard", "single system", "fixed path", "static rules"
-            ]
-        }
-    def _compile_requirement_patterns(self) -> None:
-        """Compile regex patterns for requirement extraction."""
-        # Action patterns
-        self.action_patterns = [
-            r"(?:need|should|must|will|shall) (?:to )?([a-z]+)",
-            r"responsible for ([a-z]+ing)",
-            r"capability to ([a-z]+)",
-            r"ability to ([a-z]+)"
-        ]
-        # System patterns
-        self.system_patterns = [
-            r"(?:in|from|to|using|within) (?:the )?([A-Za-z0-9]+)(?: system| application| platform| software| tool)?",
-            r"([A-Za-z0-9]+)(?: system| application| platform| software| tool)",
-            r"([A-Za-z0-9]+) (?:database|interface|API|server)"
-        ]
-        # Frequency patterns
-        self.frequency_patterns = [
-            r"(daily|weekly|monthly|quarterly|yearly|annually)",
-            r"every ([0-9]+) (day|week|month|quarter|year)s?",
-            r"([0-9]+) times per (day|week|month|year)"
-        ]
-        # Compile all patterns
-        self.action_regex = [re.compile(pattern) for pattern in self.action_patterns]
-        self.system_regex = [re.compile(pattern) for pattern in self.system_patterns]
-        self.frequency_regex = [re.compile(pattern) for pattern in self.frequency_patterns]
-    def analyze_text_requirement(self, requirement_text: str, source: str = None) -> Dict:
-        """
-        Analyze a natural language requirement and extract structured information.
-        Args:
-            requirement_text: The text of the requirement
-            source: Source of the requirement
-        Returns:
-            Dictionary with extracted requirement information
-        """
-        # Parse with spaCy
-        doc = self.nlp(requirement_text)
-        # Basic requirement object
-        requirement = {
-            "id": f"REQ-{len(self.requirements) + 1}",
-            "text": requirement_text,
-            "source": source,
-            "extracted": {
-                "actions": self._extract_actions(doc, requirement_text),
-                "systems": self._extract_systems(doc, requirement_text),
-                "frequency": self._extract_frequency(requirement_text),
-                "business_domain": self._classify_business_domain(doc),
-                "complexity": self._assess_complexity(doc, requirement_text),
-                "data_elements": self._extract_data_elements(doc)
-            },
-            "automation_potential": None  # Will be filled later
-        }
-        # Store the requirement
-        self.requirements.append(requirement)
-        return requirement
-    def _extract_actions(self, doc, text: str) -> List[str]:
-        """
-        Extract action verbs from requirement text.
-        Args:
-            doc: spaCy processed document
-            text: Original text
-        Returns:
-            List of action verbs
-        """
-        # Method 1: Use spaCy to find verbs
-        verbs = [token.lemma_ for token in doc if token.pos_ == "VERB"]
-        # Method 2: Use regex patterns
-        pattern_matches = []
-        for pattern in self.action_regex:
-            matches = pattern.findall(text.lower())
-            pattern_matches.extend(matches)
-        # Combine and deduplicate
-        all_actions = list(set(verbs + pattern_matches))
-        # Filter out common non-action verbs
-        stopwords = ["be", "is", "are", "was", "were", "have", "has", "had"]
-        filtered_actions = [v for v in all_actions if v not in stopwords and len(v) > 2]
-        return filtered_actions
-    def _extract_systems(self, doc, text: str) -> List[str]:
-        """
-        Extract system names from requirement text.
-        Args:
-            doc: spaCy processed document
-            text: Original text
-        Returns:
-            List of system names
-        """
-        # Method 1: Named Entity Recognition for PRODUCT entities
-        ner_systems = [ent.text for ent in doc.ents
-                       if ent.label_ in ["PRODUCT", "ORG", "GPE"]]
-        # Method 2: Pattern matching
-        pattern_systems = []
-        for pattern in self.system_regex:
-            matches = pattern.findall(text)
-            pattern_systems.extend(matches)
-        # Combine results
-        all_systems = list(set(ner_systems + pattern_systems))
-        # Filter out common false positives
-        stopwords = ["system", "process", "application", "data", "information", "this", "the"]
-        filtered_systems = [s for s in all_systems if s.lower() not in stopwords and len(s) > 2]
-        return filtered_systems
-    def _extract_frequency(self, text: str) -> Optional[str]:
-        """
-        Extract frequency information from requirement text.
-        Args:
-            text: Requirement text
-        Returns:
-            Extracted frequency or None
-        """
-        text_lower = text.lower()
-        # Check all frequency patterns
-        for pattern in self.frequency_regex:
-            match = pattern.search(text_lower)
-            if match:
-                return match.group(0)
-        # Check for specific frequency words
-        frequency_words = ["daily", "weekly", "monthly", "quarterly", "annually", "yearly"]
-        for word in frequency_words:
-            if word in text_lower:
-                return word
-        return None
-    def _classify_business_domain(self, doc) -> List[Tuple[str, float]]:
-        """
-        Classify the business domain of the requirement.
-        Args:
-            doc: spaCy processed document
-        Returns:
-            List of (domain, confidence) tuples
-        """
-        text = doc.text.lower()
-        domain_scores = {}
-        # Calculate score for each domain based on keyword matches
-        for domain, keywords in self.process_taxonomy.items():
-            domain_score = 0
-            for keyword in keywords:
-                if keyword in text:
-                    domain_score += 1
-            if domain_score > 0:
-                # Normalize by number of keywords
-                domain_scores[domain] = domain_score / len(keywords)
-        # If no direct matches, use semantic similarity
-        if not domain_scores:
-            for domain, keywords in self.process_taxonomy.items():
-                # Calculate average similarity between doc and each keyword
-                similarities = [doc.similarity(self.nlp(keyword)) for keyword in keywords]
-                avg_similarity = sum(similarities) / len(similarities) if similarities else 0
-                if avg_similarity > 0.5:  # Threshold for relevance
-                    domain_scores[domain] = avg_similarity
-        # Sort by score and return
-        sorted_domains = sorted(domain_scores.items(), key=lambda x: x[1], reverse=True)
-        return sorted_domains
-    def _assess_complexity(self, doc, text: str) -> str:
-        """
-        Assess the complexity of the requirement.
-        Args:
-            doc: spaCy processed document
-            text: Original text
-        Returns:
-            Complexity level ("high", "medium", or "low")
-        """
-        text_lower = text.lower()
-        # Count indicators for each complexity level
-        scores = {level: 0 for level in self.complexity_indicators.keys()}
-        for level, indicators in self.complexity_indicators.items():
-            for indicator in indicators:
-                if indicator in text_lower:
-                    scores[level] += 1
-        # Check sentence structure complexity
-        sentence_count = len(list(doc.sents))
-        avg_tokens_per_sentence = len(doc) / sentence_count if sentence_count > 0 else 0
-        # Adjust scores based on structural complexity
-        if avg_tokens_per_sentence > 25:
-            scores["high"] += 1
-        elif avg_tokens_per_sentence > 15:
-            scores["medium"] += 1
-        # Check for conditional statements (if/then)
-        if "if" in text_lower and ("then" in text_lower or "else" in text_lower):
-            scores["high"] += 1
-        # Determine final complexity
-        if scores["high"] > 0:
-            return "high"
-        elif scores["medium"] > 0:
-            return "medium"
-        else:
-            return "low"
-    def _extract_data_elements(self, doc) -> List[str]:
-        """
-        Extract data elements from the requirement text.
-        Args:
-            doc: spaCy processed document
-        Returns:
-            List of data elements
-        """
-        # Find noun chunks that could be data elements
-        data_elements = []
-        for chunk in doc.noun_chunks:
-            # Check if this looks like a data field
-            if (any(token.pos_ == "NOUN" for token in chunk) and
-                len(chunk) <= 4 and  # Not too long
-                not any(token.is_stop for token in chunk)):  # Not all stopwords
-                data_elements.append(chunk.text)
-        # Look for specific data patterns
-        data_patterns = [
-            (r"\b[A-Z][a-z]+ ID\b", "ID field"),
-            (r"\b[A-Z][a-z]+ Number\b", "Number field"),
-            (r"\b[A-Z][a-z]+ Code\b", "Code field"),
-            (r"\b[A-Z][a-z]+ Date\b", "Date field"),
-            (r"\bstatus\b", "Status field")
-        ]
-        for pattern, field_type in data_patterns:
-            if re.search(pattern, doc.text):
-                data_elements.append(field_type)
-        return list(set(data_elements))
-    def analyze_requirements_batch(self, requirements: List[Dict]) -> List[Dict]:
-        """
-        Analyze a batch of requirements and find relationships between them.
-        Args:
-            requirements: List of requirement dictionaries with 'text' field
-        Returns:
-            List of analyzed requirements
-        """
-        # Process each requirement
-        processed_requirements = []
-        for req in requirements:
-            req_text = req.get('text', '')
-            source = req.get('source', 'batch')
-            processed = self.analyze_text_requirement(req_text, source)
-            processed_requirements.append(processed)
-        # Find relationships between requirements
-        self._find_requirement_relationships(processed_requirements)
-        return processed_requirements
-    def _find_requirement_relationships(self, requirements: List[Dict]) -> None:
-        """
-        Find and add relationships between requirements.
-        Args:
-            requirements: List of processed requirements
-        """
-        if len(requirements) < 2:
-            return
-        # Extract text from requirements
-        texts = [req["text"] for req in requirements]
-        # Create TF-IDF matrix
-        vectorizer = TfidfVectorizer(stop_words='english')
-        tfidf_matrix = vectorizer.fit_transform(texts)
-        # Calculate similarity matrix
-        similarity_matrix = cosine_similarity(tfidf_matrix)
-        # Add relationships to requirements
-        for i, req in enumerate(requirements):
-            related = []
-            for j, similarity in enumerate(similarity_matrix[i]):
-                if i != j and similarity > 0.3:  # Threshold for relationship
-                    related.append({
-                        "id": requirements[j]["id"],
-                        "similarity": float(similarity),
-                        "relationship_type": self._determine_relationship_type(req, requirements[j])
-                    })
-            # Sort by similarity
-            related.sort(key=lambda x: x["similarity"], reverse=True)
-            # Add to requirement
-            req["related_requirements"] = related[:5]  # Top 5 related requirements
-    def _determine_relationship_type(self, req1: Dict, req2: Dict) -> str:
-        """
-        Determine the type of relationship between two requirements.
-        Args:
-            req1: First requirement
-            req2: Second requirement
-        Returns:
-            Relationship type string
-        """
-        # Check for system relationships
-        systems1 = set(req1["extracted"]["systems"])
-        systems2 = set(req2["extracted"]["systems"])
-        if systems1.intersection(systems2):
-            return "same_system"
-        # Check for business domain relationships
-        domains1 = [d[0] for d in req1["extracted"]["business_domain"]]
-        domains2 = [d[0] for d in req2["extracted"]["business_domain"]]
-        if set(domains1).intersection(set(domains2)):
-            return "same_domain"
-        # Check for action relationships
-        actions1 = set(req1["extracted"]["actions"])
-        actions2 = set(req2["extracted"]["actions"])
-        if actions1.intersection(actions2):
-            return "similar_action"
-        # Default relationship type
-        return "related"
-    def map_requirements_to_processes(self, requirements: List[Dict], process_models: List[Dict]) -> Dict:
-        """
-        Map requirements to process models based on content matching.
-        Args:
-            requirements: List of analyzed requirements
-            process_models: List of process model dictionaries
-        Returns:
-            Dictionary mapping process IDs to requirement IDs
-        """
-        process_to_reqs = {}
-        req_to_process = {}
-        for process in process_models:
-            process_id = process.get("id", "unknown")
-            process_text = process.get("description", "") + " " + process.get("name", "")
-            process_doc = self.nlp(process_text)
-            # Find matching requirements
-            matching_reqs = []
-            for req in requirements:
-                req_text = req["text"]
-                req_doc = self.nlp(req_text)
-                # Calculate similarity
-                similarity = process_doc.similarity(req_doc)
-                if similarity > 0.6:  # Threshold for matching
-                    matching_reqs.append({
-                        "req_id": req["id"],
-                        "similarity": float(similarity)
-                    })
-                    req_to_process[req["id"]] = process_id
-            # Sort by similarity
-            matching_reqs.sort(key=lambda x: x["similarity"], reverse=True)
-            process_to_reqs[process_id] = matching_reqs
-        return {
-            "process_to_requirements": process_to_reqs,
-            "requirement_to_process": req_to_process
-        }
-    def evaluate_automation_potential(self, requirement: Dict) -> Dict:
-        """
-        Evaluate the automation potential of a requirement.
-        Args:
-            requirement: Analyzed requirement
-        Returns:
-            Automation potential assessment
-        """
-        # Basic score starts at 5 out of 10
-        score = 5
-        # Complexity factor (high complexity decreases score)
-        complexity = requirement["extracted"]["complexity"]
-        if complexity == "high":
-            score -= 2
-        elif complexity == "low":
-            score += 2
-        # Action factor (certain actions are more automatable)
-        automatable_actions = ["extract", "transfer", "copy", "move", "calculate",
-                              "update", "generate", "validate", "verify", "send",
-                              "notify", "schedule", "retrieve", "check"]
-        for action in requirement["extracted"]["actions"]:
-            if action in automatable_actions:
-                score += 0.5
-        # System factor (presence of systems increases score)
-        if requirement["extracted"]["systems"]:
-            score += len(requirement["extracted"]["systems"]) * 0.5
-        # Data elements factor (more data elements suggests more structure)
-        data_elements = requirement["extracted"]["data_elements"]
-        if data_elements:
-            score += min(len(data_elements) * 0.3, 2)  # Cap at +2
-        # Cap score between 1-10
-        score = max(1, min(10, score))
-        # Determine category
-        category = "high" if score >= 7.5 else "medium" if score >= 5 else "low"
-        # Identify automation technology
-        tech = self._recommend_automation_technology(requirement, score)
-        return {
-            "automation_score": round(score, 1),
-            "automation_category": category,
-            "recommended_technology": tech,
-            "rationale": self._generate_automation_rationale(requirement, score, category)
-        }
-    def _recommend_automation_technology(self, requirement: Dict, score: float) -> str:
-        """
-        Recommend suitable automation technology.
-        Args:
-            requirement: Analyzed requirement
-            score: Automation score
-        Returns:
-            Recommended technology
-        """
-        complexity = requirement["extracted"]["complexity"]
-        actions = requirement["extracted"]["actions"]
-        # Decision tree for technology recommendation
-        if score >= 8:
-            if any(a in actions for a in ["extract", "scrape", "read"]):
-                return "RPA with OCR/Document Understanding"
-            else:
-                return "Traditional RPA"
-        elif score >= 5:
-            if complexity == "high":
-                return "RPA with Human-in-the-Loop"
-            elif any(a in actions for a in ["decide", "evaluate", "assess"]):
-                return "RPA with Decision Automation"
-            else:
-                return "Traditional RPA"
-        else:
-            if any(a in actions for a in ["review", "approve"]):
-                return "Workflow Automation"
-            else:
-                return "Partial Automation with Human Tasks"
-    def _generate_automation_rationale(self, requirement: Dict, score: float, category: str) -> str:
-        """
-        Generate explanation for automation assessment.
-        Args:
-            requirement: Analyzed requirement
-            score: Automation score
-            category: Automation category
-        Returns:
-            Rationale text
-        """
-        complexity = requirement["extracted"]["complexity"]
-        if category == "high":
-            return (f"This requirement has {complexity} complexity but shows strong automation "
-                   f"potential due to clear structure and defined data elements. "
-                   f"Score of {score}/10 indicates this is a prime automation candidate.")
-        elif category == "medium":
-            return (f"This {complexity} complexity requirement has moderate automation potential. "
-                   f"Score of {score}/10 suggests partial automation with some human oversight.")
-        else:
-            return (f"The {complexity} complexity and ambiguous nature of this requirement "
-                   f"limits automation potential. Score of {score}/10 indicates this may "
-                   f"require significant human involvement or process redesign.")
-    def assess_requirements_automation_potential(self, requirements: List[Dict]) -> List[Dict]:
-        """
-        Assess automation potential for a batch of requirements.
-        Args:
-            requirements: List of analyzed requirements
-        Returns:
-            Requirements with automation assessment added
-        """
-        for req in requirements:
-            req["automation_potential"] = self.evaluate_automation_potential(req)
-        return requirements
-    def generate_requirements_report(self, requirements: List[Dict]) -> Dict:
-        """
-        Generate a summary report of requirements analysis.
-        Args:
-            requirements: List of analyzed requirements
-        Returns:
-            Report dictionary
-        """
-        # Count by complexity
-        complexity_counts = {"high": 0, "medium": 0, "low": 0}
-        for req in requirements:
-            complexity = req["extracted"]["complexity"]
-            complexity_counts[complexity] += 1
-        # Count by automation potential
-        if all("automation_potential" in req for req in requirements):
-            automation_counts = {"high": 0, "medium": 0, "low": 0}
-            for req in requirements:
-                category = req["automation_potential"]["automation_category"]
-                automation_counts[category] += 1
-        else:
-            automation_counts = None
-        # Find common systems
-        all_systems = []
-        for req in requirements:
-            all_systems.extend(req["extracted"]["systems"])
-        system_counts = {}
-        for system in all_systems:
-            if system in system_counts:
-                system_counts[system] += 1
-            else:
-                system_counts[system] = 1
-        # Sort systems by frequency
-        top_systems = sorted(system_counts.items(), key=lambda x: x[1], reverse=True)[:5]
-        # Generate report
-        report = {
-            "total_requirements": len(requirements),
-            "complexity_distribution": complexity_counts,
-            "automation_potential": automation_counts,
-            "top_systems": top_systems,
-            "recommendations": self._generate_overall_recommendations(requirements)
-        }
-        return report
-    def _generate_overall_recommendations(self, requirements: List[Dict]) -> List[str]:
-        """
-        Generate overall recommendations based on requirements analysis.
-        Args:
-            requirements: List of analyzed requirements
-        Returns:
-            List of recommendation strings
-        """
-        recommendations = []
-        # Check if automation assessment is available
-        automation_available = all("automation_potential" in req for req in requirements)
-        if automation_available:
-            # Count high automation potential requirements
-            high_potential = [r for r in requirements
-                             if r["automation_potential"]["automation_category"] == "high"]
-            if len(high_potential) >= len(requirements) * 0.7:
-                recommendations.append(
-                    "High automation potential across most requirements. "
-                    "Consider an end-to-end automation solution."
-                )
-            elif len(high_potential) >= len(requirements) * 0.3:
-                recommendations.append(
-                    "Significant automation potential in a subset of requirements. "
-                    "Consider a phased automation approach starting with high-potential areas."
-                )
-            else:
-                recommendations.append(
-                    "Limited automation potential in current requirements. "
-                    "Consider process redesign to increase automation potential."
-                )
-            # Recommend technologies
-            tech_counts = {}
-            for req in requirements:
-                tech = req["automation_potential"]["recommended_technology"]
-                tech_counts[tech] = tech_counts.get(tech, 0) + 1
-            top_tech = max(tech_counts.items(), key=lambda x: x[1])[0]
-            recommendations.append(f"Primary recommended technology: {top_tech}")
-        # Requirements quality recommendations
-        completeness_issues = False
-        for req in requirements:
-            if (not req["extracted"]["actions"] or
-                not req["extracted"]["systems"] or
-                not req["extracted"]["data_elements"]):
-                completeness_issues = True
-                break
-        if completeness_issues:
-            recommendations.append(
-                "Some requirements lack necessary details. "
-                "Consider refining requirements to specify actions, systems, and data elements."
-            )
-        return recommendations

+import nltk
+from spacy.lang.en import English
+# Example input: process description
+process_description = """
+The accounts payable team receives invoices via email.
+They verify the invoice details, check for duplicates, and approve payment.
+"""
+# Preprocess the text
+def preprocess_text(text):
+    tokenizer = English()
+    tokens = tokenizer(text)
+    processed_text = [token.lemma_ for token in tokens if not token.is_stop]
+    return ' '.join(proces
+                    sed_text)
+processed_desc = preprocess_text(process_description)
+print(processed_desc)
 import spacy
+nlp = spacy.load('en_core_web_sm')
+def extract_entities(text):
+    doc = nlp(text)
+    entities = [(ent.text, ent.label_) for ent in doc.ents]
+    return entities
+entities = extract_entities(process_description)
+print("Extracted Entities:", entities)
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.svm import SVC
+# Sample training data (simplified)
+X = [
+    "receive invoices via email",  # Automatable
+    "verify invoice details",       # Automatable
+    "approve payment manually"      # Non-automatable
+]
+y = [1, 1, 0]
+# Feature extraction
+vectorizer = TfidfVectorizer()
+X_vec = vectorizer.fit_transform(X)
+# Train a simple SVM
+model = SVC()
+model.fit(X_vec, y)
+# Predict automation feasibility
+def predict_automation_feasibility(text):
+    text_vec = vectorizer.transform([text])
+    return model.predict(text_vec)[0]
+print(predict_automation_feasibility("check for duplicates"))  # Output: 1 (Automatable)
+# Example workflow for UiPath
+def generate_uipath_workflow(tasks):
+    workflow = f"""
+    <Workflow [ContentUIVersion='1.0.0.0' TargetPlatform='.NETFramework,Version=v6.0' TargetRuntime='V6_0' HostRuntimeERO='255,255'>
+        <Variable Type='Object' Name='invoiceDetails' />
+        {''.join([f"<Variable Type='Object' Name='task_{task}' />" for task in tasks])}
+        <Sequence>
+            {''.join([f"<Activitysqueeze Code='GeneratedActivity严格落实任务_{task}' />" for task in tasks])}
+        </Sequence>
+    </Workflow>
     """
+    return workflow
+tasks = ["receive_invoices", "verify_details", "approve_payment"]
+workflow = generate_uipath_workflow(tasks)
+print(workflow)
+# Example workflow for UiPath
+def generate_uipath_workflow(tasks):
+    workflow = f"""
+    <Workflow [ContentUIVersion='1.0.0.0' TargetPlatform='.NETFramework,Version=v6.0' TargetRuntime='V6_0' HostRuntimeERO='255,255'>
+        <Variable Type='Object' Name='invoiceDetails' />
+        {''.join([f"<Variable Type='Object' Name='task_{task}' />" for task in tasks])}
+        <Sequence>
+            {''.join([f"<Activitysqueeze Code='GeneratedActivity严格落实任务_{task}' />" for task in tasks])}
+        </Sequence>
+    </Workflow>
     """
+    return workflow
+tasks = ["receive_invoices", "verify_details", "approve_payment"]
+workflow = generate_uipath_workflow(tasks)
+print(workflow)
+# Example: Connect to UiPath Orchestrator API
+import requests
+def execute_workflow(workflow, uipath_uri, api_key):
+    headers = {
+        "Authorization": f"Bearer {api_key}",
+        "Content-Type": "application/xml"
+    }
+    response = requests.post(f"{uipath_uri}/api/workflows", headers=headers, data=workflow)
+    return response.json()
+# Example API call
+uipath_uri = "https://your-uipath-orchestrator-url"
+api_key = "your-api-key"
+response = execute_workflow(workflow, uipath_uri, api_key)
+print("Workflow Execution Response:", response)