Final_Agent_HF_Course

Sleeping

App Files Files Community

selim-ba commited on Jun 28, 2025

Commit

5cae2c0

verified ·

1 Parent(s): 2abd52d

Update app.py

Browse files

Files changed (1) hide show

app.py +455 -119

app.py CHANGED Viewed

@@ -35,6 +35,7 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 class SuperSmartAgent:
     def __init__(self):
         self.graph = self._build_graph()
@@ -122,14 +123,17 @@ class SuperSmartAgent:
         def check_wikipedia_suitability(state):
             q = state["question"].lower()
-            triggers = ["wikipedia","Wikipedia","who is", "what is", "when did", "where is", "tell me about", "how many"]
             state["is_wiki"] = any(trigger in q for trigger in triggers)
             return state
         def search_wikipedia(state):
             question = state["question"]
             try:
-                # Use wikipedia library's search instead of wikipediaapi
                 page_titles = wikipedia.search(question)
                 if not page_titles:
                     state["response"] = "No relevant Wikipedia article found."
@@ -141,8 +145,59 @@ class SuperSmartAgent:
                 state["response"] = f"Error fetching Wikipedia content: {e}"
             return state
         def preprocess_context(context):
-            context = re.sub(r'\[\d+\]', '', context)
             context = re.sub(r'\s+', ' ', context).strip()
             context = re.sub(r'\{\|.*?\|\}', '', context, flags=re.DOTALL)
             return context
@@ -162,105 +217,424 @@ class SuperSmartAgent:
         def general_reasoning_qa(state):
             question = state["question"]
-            # Step 1: Search Wikipedia for relevant pages
             try:
-                # Use wikipedia library for search functionality
                 search_results = wikipedia.search(question, results=3)
-                context = ""
-                # Use wikipediaapi to get full content for each result
-                for title in search_results:
-                    try:
-                        page = self.wiki_wiki.page(title)
-                        if page.exists():
-                            context += f"\n\n=== Content from: {title} ===\n\n"
-                            context += page.text
-                    except Exception as e:
-                        print(f"Error processing page {title}: {e}")
-                        continue
                 if not context:
                     state["response"] = "Sorry, I couldn't find relevant information."
                     return state
                 # Preprocess the context
-                context = preprocess_context(context)
-                # Step 2: Extract key phrases from the question
-                key_phrases = extract_key_phrases(question)
-                # Step 3: Find relevant sections in the context
-                relevant_sections = []
-                sections = re.split(r'\n\s*\n', context)
-                for section in sections:
-                    if any(phrase.lower() in section.lower() for phrase in key_phrases):
-                        relevant_sections.append(section)
-                if not relevant_sections:
-                    state["response"] = "I found information but couldn't identify the most relevant parts."
-                    return state
-                # Combine relevant sections
-                relevant_context = "\n\n".join(relevant_sections)
-                # Step 4: Simple answer extraction based on patterns
-                answer = self.extract_answer(question, relevant_context)
                 if answer:
                     state["response"] = answer
                 else:
                     try:
-                        if search_results:
-                            first_page = self.wiki_wiki.page(search_results[0])
-                            if first_page.exists():
-                                summary = first_page.summary[:500] + "..."  # Limit summary length
-                                state["response"] = f"I couldn't find a specific answer, but here's some relevant information: {summary}"
-                            else:
-                                state["response"] = "No relevant information found."
                     except Exception as e:
                         state["response"] = f"I couldn't find a specific answer in the available information."
             except Exception as e:
                 state["response"] = f"An error occurred while searching for information: {str(e)}"
             return state
-        def extract_answer(question, context):
-            """Simple heuristic-based answer extraction"""
-            if re.search(r'\bhow many\b', question.lower()):
-                numbers = re.findall(r'\d+', context)
-                if numbers:
-                    return f"The answer is {numbers[0]}."
-            elif re.search(r'\bwhen (did|was|were)\b', question.lower()):
                 years = re.findall(r'\b(19|20)\d{2}\b', context)
-                if years:
-                    return f"The answer is {years[0]}."
-            elif re.search(r'\bwho (is|was)\b', question.lower()):
-                names = re.findall(r'\b[A-Z][a-z]+ [A-Z][a-z]+\b', context)
-                if names:
-                    return f"The answer is {names[0]}."
-            elif re.search(r'\bwhat (is|are|was|were)\b', question.lower()):
-                first_sentence = re.search(r'^[^.!?]*[.!?]', context)
-                if first_sentence:
-                    return first_sentence.group(0)
-            elif re.search(r'\blist of\b', question.lower()):
-                items = re.findall(r'^\s*[•*-]\s*.*', context, re.MULTILINE)
-                if items:
-                    return "Some relevant items: " + ", ".join([item.strip()[2:] for item in items[:3]]) + "..."
-            key_phrases = extract_key_phrases(question)
-            if key_phrases:
                 sentences = re.split(r'[.!?]', context)
                 for sentence in sentences:
-                    if any(phrase.lower() in sentence.lower() for phrase in key_phrases):
-                        return sentence.strip() + "."
             return None
         class AgentState(TypedDict, total=False):
             question: str
             is_reversed: bool
             is_python: bool
             is_riddle: bool
-            use_tool: str
             response: str
         builder = StateGraph(AgentState)
         # --- Nodes ---
         builder.add_node("check_reversed", check_reversed)
@@ -274,87 +648,49 @@ class SuperSmartAgent:
         builder.add_node("check_python_suitability", check_python_suitability)
         builder.add_node("generate_code", generate_code)
         builder.add_node("fallback", fallback)
-        # Entry
         builder.set_entry_point("check_reversed")
-        # Edges
         builder.add_edge("check_reversed", "fix_question")
         builder.add_edge("fix_question", "check_riddle_or_trick")
         builder.add_conditional_edges(
             "check_riddle_or_trick",
             lambda s: "solve_riddle" if s.get("is_riddle") else "check_wikipedia_suitability"
         )
         builder.add_conditional_edges(
             "check_wikipedia_suitability",
             lambda s: "search_wikipedia" if s.get("is_wiki") else "check_reasoning_needed"
         )
         builder.add_conditional_edges(
             "check_reasoning_needed",
             lambda s: "general_reasoning_qa" if s.get("needs_reasoning") else "check_python_suitability"
         )
         builder.add_conditional_edges(
             "check_python_suitability",
             lambda s: "generate_code" if s.get("is_python") else "fallback"
         )
-        # Ends
         builder.add_edge("solve_riddle", END)
         builder.add_edge("search_wikipedia", END)
         builder.add_edge("general_reasoning_qa", END)
         builder.add_edge("generate_code", END)
         builder.add_edge("fallback", END)
         graph = builder.compile()
         return graph
-    def extract_answer(self, question, context):
-        """Simple heuristic-based answer extraction"""
-        # If question asks for a count (e.g., "how many")
-        if re.search(r'\bhow many\b', question.lower()):
-            numbers = re.findall(r'\d+', context)
-            if numbers:
-                return f"The answer is {numbers[0]}."
-        # If question asks for a date/year (e.g., "when did")
-        elif re.search(r'\bwhen (did|was|were)\b', question.lower()):
-            years = re.findall(r'\b(19|20)\d{2}\b', context)
-            if years:
-                return f"The answer is {years[0]}."
-        # If question asks for a name/person (e.g., "who is")
-        elif re.search(r'\bwho (is|was)\b', question.lower()):
-            names = re.findall(r'\b[A-Z][a-z]+ [A-Z][a-z]+\b', context)
-            if names:
-                return f"The answer is {names[0]}."
-        # If question asks for a definition/explanation (e.g., "what is")
-        elif re.search(r'\bwhat (is|are|was|were)\b', question.lower()):
-            first_sentence = re.search(r'^[^.!?]*[.!?]', context)
-            if first_sentence:
-                return first_sentence.group(0)
-        # If question asks for a list (e.g., "list of")
-        elif re.search(r'\blist of\b', question.lower()):
-            items = re.findall(r'^\s*[•*-]\s*.*', context, re.MULTILINE)
-            if items:
-                return "Some relevant items: " + ", ".join([item.strip()[2:] for item in items[:3]]) + "..."
-        # Default case - return a relevant sentence containing question keywords
-        key_phrases = extract_key_phrases(question)
-        if key_phrases:
-            sentences = re.split(r'[.!?]', context)
-            for sentence in sentences:
-                if any(phrase.lower() in sentence.lower() for phrase in key_phrases):
-                    return sentence.strip() + "."
-        return None
     def __call__(self, question: str) -> str:
         state = {"question": question}
         result = self.graph.invoke(state)
         return result.get("response", "No answer generated.")
 ########################################
 def run_and_submit_all( profile: gr.OAuthProfile | None):

 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 class SuperSmartAgent:
     def __init__(self):
         self.graph = self._build_graph()
         def check_wikipedia_suitability(state):
             q = state["question"].lower()
+            triggers = [
+                "wikipedia", "who is", "what is", "when did", "where is",
+                "tell me about", "how many", "how much", "what was the",
+                "describe", "explain", "information about", "details about"
+            ]
             state["is_wiki"] = any(trigger in q for trigger in triggers)
             return state
         def search_wikipedia(state):
             question = state["question"]
             try:
                 page_titles = wikipedia.search(question)
                 if not page_titles:
                     state["response"] = "No relevant Wikipedia article found."
                 state["response"] = f"Error fetching Wikipedia content: {e}"
             return state
+        def get_relevant_context(self, question, search_results):
+            """
+            Get more relevant context by focusing on the most relevant page and sections.
+            """
+            if not search_results:
+                return ""
+            try:
+                title = search_results[0]
+                page = self.wiki_wiki.page(title)
+                if page.exists():
+                    full_content = page.text
+                    # Try to identify the most relevant sections based on question keywords
+                    key_phrases = self.extract_key_phrases(question)
+                    # Split content into sections (simplified approach)
+                    sections = re.split(r'\n\s*\n', full_content)
+                    relevant_sections = []
+                    for section in sections:
+                        # Check if section contains any of the key phrases
+                        section_lower = section.lower()
+                        if any(phrase.lower() in section_lower for phrase in key_phrases):
+                            # Also check if section looks like it contains statistics or tables
+                            if self.section_contains_statistics(section):
+                                relevant_sections.insert(0, section)  # Put more likely sections first
+                            else:
+                                relevant_sections.append(section)
+                    if relevant_sections:
+                        return "\n\n".join(relevant_sections)
+                    return full_content[:10000]  # Limit context size
+            except Exception as e:
+                print(f"Error processing page: {e}")
+                return ""
+            return ""
+        def section_contains_statistics(self, section):
+            """Determine if a section likely contains statistics."""
+            indicators = [
+                'statistics', 'stats', 'season', 'player',
+                'year', 'at bat', 'walk', 'home run', 'rbi',
+                'era', '| Year', '| Player', '| AB', '| W'
+            ]
+            section_lower = section.lower()
+            return any(indicator.lower() in section_lower for indicator in indicators)
         def preprocess_context(context):
+            context = re.sub(r'$$\d+$$', '', context)
             context = re.sub(r'\s+', ' ', context).strip()
             context = re.sub(r'\{\|.*?\|\}', '', context, flags=re.DOTALL)
             return context
         def general_reasoning_qa(state):
             question = state["question"]
             try:
                 search_results = wikipedia.search(question, results=3)
+                if not search_results:
+                    state["response"] = "Sorry, I couldn't find relevant information."
+                    return state
+                context = self.get_relevant_context(question, search_results)
                 if not context:
                     state["response"] = "Sorry, I couldn't find relevant information."
                     return state
                 # Preprocess the context
+                context = self.preprocess_context(context)
+                # Extract tables if available
+                tables = self.extract_tables_from_wikipedia(context)
+                # Use enhanced answer extraction
+                answer = self.extract_answer(question, context, tables)
                 if answer:
                     state["response"] = answer
                 else:
                     try:
+                        first_page = self.wiki_wiki.page(search_results[0])
+                        if first_page.exists():
+                            summary = first_page.summary[:500] + "..."
+                            state["response"] = f"I couldn't find a specific answer, but here's some relevant information: {summary}"
+                        else:
+                            state["response"] = "No relevant information found."
                     except Exception as e:
                         state["response"] = f"I couldn't find a specific answer in the available information."
             except Exception as e:
                 state["response"] = f"An error occurred while searching for information: {str(e)}"
             return state
+        def extract_tables_from_wikipedia(self, content):
+            """
+            Extract tables from Wikipedia content.
+            """
+            tables = []
+            # Look for wiki markup tables
+            table_pattern = r'\{\|(.*?)\|\}', re.DOTALL
+            table_matches = re.findall(table_pattern, content)
+            for table_match in table_matches:
+                rows = re.split(r'\|\-', table_match)
+                clean_rows = []
+                for row in rows:
+                    cells = re.split(r'\|\|', row)
+                    clean_cells = []
+                    for cell in cells:
+                        cell = re.sub(r'\[\[([^|\]]+)(?:|[^\]]+)?\]\]', r'\1', cell)
+                        cell = re.sub(r'<[^>]+>', '', cell)
+                        cell = re.sub(r'{{\s*[^{}]+\s*}}', '', cell)
+                        cell = re.sub(r'\s+', ' ', cell).strip()
+                        clean_cells.append(cell)
+                    if clean_cells:
+                        clean_rows.append(clean_cells)
+                if clean_rows:
+                    tables.append(clean_rows)
+            # Look for HTML tables
+            html_table_pattern = r'<table.*?</table>', re.DOTALL|re.IGNORECASE
+            html_table_matches = re.findall(html_table_pattern, content)
+            for table_match in html_table_matches:
+                rows = re.findall(r'<tr.*?</tr>', table_match, re.DOTALL|re.IGNORECASE)
+                clean_rows = []
+                for row in rows:
+                    cells = re.findall(r'<t[dh].*?</t[dh]>', row, re.DOTALL|re.IGNORECASE)
+                    clean_cells = []
+                    for cell in cells:
+                        cell = re.sub(r'<.*?>', '', cell)
+                        cell = re.sub(r'\s+', ' ', cell).strip()
+                        clean_cells.append(cell)
+                    if clean_cells:
+                        clean_rows.append(clean_cells)
+                if clean_rows:
+                    tables.append(clean_rows)
+            return tables
+        def extract_answer(self, question, context, tables=None):
+            """
+            Enhanced general purpose answer extraction from text context.
+            """
+            if tables is None:
+                tables = []
+            question_lower = question.lower()
+            context_lower = context.lower()
+            # First try to detect what type of question it is
+            question_type = self.detect_question_type(question_lower)
+            # Extract all numbers from context with their surrounding text
+            number_contexts = []
+            for match in re.finditer(r'(\d[\d,]*\d*)', context):
+                start_pos = max(0, match.start() - 50)
+                end_pos = min(len(context), match.end() + 50)
+                surrounding_text = context[start_pos:end_pos]
+                number_contexts.append((match.group(1).replace(',', ''), surrounding_text))
+            # Extract all named entities
+            named_entities = self.extract_named_entities(context)
+            # Try to answer based on question type
+            if question_type in ["count", "how many"]:
+                # Look for numbers with relevant context
+                best_match = self.find_best_number_match(question_lower, number_contexts)
+                if best_match:
+                    number, _ = best_match
+                    return f"The answer is {number}."
+                # If no specific pattern matches, check tables for numeric answers
+                if tables:
+                    table_answer = self.find_answer_in_tables(question, tables)
+                    if table_answer:
+                        return table_answer
+            elif question_type == "person":
+                if named_entities:
+                    # Find the first person name that appears near relevant context
+                    relevant_name = self.find_relevant_person(question_lower, context_lower, named_entities)
+                    if relevant_name:
+                        return f"The answer is {relevant_name}."
+            elif question_type == "date":
+                # Look for dates/years
                 years = re.findall(r'\b(19|20)\d{2}\b', context)
+                date_patterns = [
+                    r'\b\d{1,2}\s+(January|February|March|April|May|June|July|August|September|October|November|December)[\s,]\s*\d{4}\b',
+                    r'\b\d{1,2}/\d{1,2}/\d{4}\b',
+                    r'\b\d{1,2}-\d{1,2}-\d{4}\b',
+                    r'\b\d{4}\b'
+                ]
+                for pattern in date_patterns:
+                    matches = re.findall(pattern, context)
+                    if matches:
+                        if isinstance(matches[0], tuple):
+                            return f"The answer is {matches[0][0]} {matches[0][1]}."
+                        else:
+                            return f"The answer is {matches[0]}."
+            # For other question types, try to find the most relevant sentence
+            if question_keywords := self.extract_key_phrases(question):
                 sentences = re.split(r'[.!?]', context)
+                scored_sentences = []
                 for sentence in sentences:
+                    sentence = sentence.strip()
+                    if not sentence:
+                        continue
+                    # Score based on question keyword matches
+                    score = sum(1 for keyword in question_keywords if keyword.lower() in sentence.lower())
+                    if score > 0:
+                        scored_sentences.append((score, sentence))
+                if scored_sentences:
+                    # Sort by score descending, then by length descending
+                    scored_sentences.sort(key=lambda x: (-x[0], -len(x[1])))
+                    best_sentence = scored_sentences[0][1]
+                    # Try to extract a more concise answer
+                    number_match = re.search(r'(\d[\d,]*\d*)', best_sentence)
+                    if number_match and "how many" in question_type:
+                        start_idx = max(0, number_match.start() - 30)
+                        end_idx = min(len(best_sentence), number_match.end() + 30)
+                        relevant_part = best_sentence[start_idx:end_idx].strip()
+                        if relevant_part.endswith('.'):
+                            return relevant_part
+                        return relevant_part + "."
+                    # Fall back to full sentence
+                    if best_sentence.endswith('.'):
+                        return best_sentence
+                    return best_sentence + "."
             return None
+        def detect_question_type(self, question):
+            """Classify the type of question for general processing."""
+            if re.search(r'\bhow many\b|\bhow much\b|\bwhat was the\s+\w+\s+of\b', question):
+                return "count"
+            elif re.search(r'\bwho is\b|\bwho was\b|\bwhich person\b|\bwhich player\b', question):
+                return "person"
+            elif re.search(r'\bwhen did\b|\bwhen was\b|\bwhat year\b|\bwhat date\b', question):
+                return "date"
+            elif re.search(r'\bwhat is\b|\bwhat was\b|\bwhat are\b|\bwhat were\b', question):
+                return "definition"
+            elif re.search(r'\bwhere is\b|\bwhere was\b|\bwhat location\b', question):
+                return "location"
+            elif re.search(r'\blist of\b|\blist the\b|\bgive me a list of\b', question):
+                return "list"
+            else:
+                return "general"
+        def find_best_number_match(self, question, number_contexts):
+            """Find the number from context that best matches the question."""
+            if not number_contexts:
+                return None
+            question_keywords = self.extract_key_phrases(question)
+            scored_numbers = []
+            for number, context in number_contexts:
+                context_lower = context.lower()
+                score = 0
+                # Score based on question keyword presence in context
+                for keyword in question_keywords:
+                    if keyword.lower() in context_lower:
+                        score += 1
+                # Score based on proximity of keywords to the number
+                number_pos = context_lower.find(number.lower())
+                if number_pos != -1:
+                    for keyword in question_keywords:
+                        keyword_positions = [m.start() for m in re.finditer(re.escape(keyword.lower()), context_lower)]
+                        for pos in keyword_positions:
+                            distance = abs(number_pos - pos)
+                            score += max(0, 10 - distance/10)  # Higher score for closer keywords
+                # Small boost for numbers appearing earlier in the document
+                score += (10000 - len(context)) / 10000  # Earlier numbers get slightly higher scores
+                scored_numbers.append((score, number, context))
+            if not scored_numbers:
+                return None
+            # Return the highest scoring number and its context
+            scored_numbers.sort(reverse=True, key=lambda x: x[0])
+            return (scored_numbers[0][1], scored_numbers[0][2])
+        def extract_named_entities(self, text):
+            """Extract named entities (people, places, etc.) from text."""
+            sentences = re.split(r'[.!?]', text)
+            entities = set()
+            for sentence in sentences:
+                tokens = re.findall(r'\b\w+\b', sentence)
+                # Skip first word if capitalized (likely start of sentence)
+                if len(tokens) > 0 and tokens[0][0].isupper():
+                    tokens = tokens[1:]
+                # Find sequences of capitalized words (likely proper nouns)
+                i = 0
+                while i < len(tokens):
+                    if tokens[i][0].isupper():
+                        start = i
+                        while i < len(tokens) and tokens[i][0].isupper():
+                            i += 1
+                        entity = ' '.join(tokens[start:i])
+                        if len(entity.split()) >= 2 or len(entity) > 10:
+                            entities.add(entity)
+                    else:
+                        i += 1
+            # Look for titles like Dr., Mr., etc.
+            title_pattern = r'\b(Dr|Mr|Ms|Mrs|Prof|Sr|Jr|Rev|Gen|Col|Maj|Lt|Sgt|Capt)\.\s+[A-Z][a-z]+'
+            for match in re.finditer(title_pattern, text, re.IGNORECASE):
+                full_match = match.group(0)
+                # Try to get the full name by including following capitalized words
+                remaining_text = text[match.end():]
+                remaining_words = re.findall(r'\b\w+\b', remaining_text)
+                full_entity = full_match
+                j = 0
+                while j < len(remaining_words) and remaining_words[j][0].isupper():
+                    full_entity += ' ' + remaining_words[j]
+                    j += 1
+                if full_entity:
+                    entities.add(full_entity.replace('. ', ' ').strip())
+            return list(entities)
+        def find_relevant_person(self, question, context, entities):
+            """Find the most relevant person entity based on question context."""
+            if not entities:
+                return None
+            question_keywords = self.extract_key_phrases(question)
+            best_score = -1
+            best_entity = None
+            for entity in entities:
+                score = 0
+                entity_lower = entity.lower()
+                # Check if entity appears in context near question keywords
+                entity_positions = [m.start() for m in re.finditer(re.escape(entity), context, re.IGNORECASE)]
+                for pos in entity_positions:
+                    # Check surrounding context for question keywords
+                    window_start = max(0, pos - 50)
+                    window_end = min(len(context), pos + len(entity) + 50)
+                    window_text = context[window_start:window_end]
+                    # Count keyword matches in window
+                    keyword_matches = sum(1 for keyword in question_keywords
+                                         if keyword.lower() in window_text.lower())
+                    score += keyword_matches
+                # If this entity has a higher score, select it
+                if score > best_score:
+                    best_score = score
+                    best_entity = entity
+            return best_entity
+        def find_answer_in_tables(self, question, tables):
+            """
+            Search through extracted tables to find an answer to the question.
+            """
+            if not tables:
+                return None
+            key_phrases = self.extract_key_phrases(question)
+            question_lower = question.lower()
+            for table in tables:
+                # Check if table is relevant to the question
+                table_is_relevant = False
+                # Check headers and body for keywords
+                all_text = []
+                if len(table) > 0:
+                    headers = table[0]
+                    all_text.extend(headers)
+                if len(table) > 1:
+                    body_text = ' '.join([' '.join(row) for row in table[1:]])
+                    all_text.extend(body_text.split())
+                all_text_lower = ' '.join(all_text).lower()
+                table_is_relevant = any(phrase.lower() in all_text_lower for phrase in key_phrases)
+                if not table_is_relevant:
+                    continue
+                # Determine column types
+                column_types = self.detect_column_types(table)
+                # Handle different question types based on column types
+                if "how many" in question_lower or "what was the" in question_lower:
+                    numeric_columns = [i for i, col_type in enumerate(column_types)
+                                      if col_type == 'number']
+                    if numeric_columns and len(table) > 1:
+                        # Find rows that match question keywords
+                        relevant_rows = []
+                        for row in table[1:]:  # Skip header row
+                            row_text = ' '.join(row).lower()
+                            if any(phrase.lower() in row_text for phrase in key_phrases):
+                                relevant_rows.append(row)
+                        if relevant_rows:
+                            # For each numeric column, collect the numbers from relevant rows
+                            number_candidates = []
+                            for row in relevant_rows:
+                                for col_idx in numeric_columns:
+                                    if col_idx < len(row):
+                                        cell = row[col_idx]
+                                        numbers = re.findall(r'\d[\d,]*\d*', cell)
+                                        for num in numbers:
+                                            num_clean = num.replace(',', '')
+                                            if num_clean.isdigit():
+                                                number_candidates.append((int(num_clean), row))
+                            if number_candidates:
+                                # Return the first number found in relevant rows
+                                first_num = number_candidates[0][0]
+                                return f"The answer is {first_num}."
+                elif "who" in question_lower or "which person" in question_lower:
+                    # Try to identify name columns
+                    name_columns = []
+                    for i, col_type in enumerate(column_types):
+                        if col_type == 'name' and len(table) > 1:
+                            # Check if this column looks like names
+                            sample_values = [row[i] for row in table[1:min(5, len(table))]]
+                            if self.column_looks_like_names(sample_values):
+                                name_columns.append(i)
+                    if name_columns:
+                        relevant_rows = []
+                        for row in table[1:]:
+                            row_text = ' '.join(row).lower()
+                            if any(phrase.lower() in row_text for phrase in key_phrases):
+                                relevant_rows.append(row
         class AgentState(TypedDict, total=False):
             question: str
             is_reversed: bool
             is_python: bool
             is_riddle: bool
+            is_wiki: bool  # Added for Wikipedia suitability check
+            needs_reasoning: bool  # Added for reasoning check
             response: str
+            use_tool: str  # Keep this if it's being used elsewhere
         builder = StateGraph(AgentState)
         # --- Nodes ---
         builder.add_node("check_reversed", check_reversed)
         builder.add_node("check_python_suitability", check_python_suitability)
         builder.add_node("generate_code", generate_code)
         builder.add_node("fallback", fallback)
+        # Entry point remains the same
         builder.set_entry_point("check_reversed")
+        # Edges - updated to match your current workflow
         builder.add_edge("check_reversed", "fix_question")
         builder.add_edge("fix_question", "check_riddle_or_trick")
         builder.add_conditional_edges(
             "check_riddle_or_trick",
             lambda s: "solve_riddle" if s.get("is_riddle") else "check_wikipedia_suitability"
         )
         builder.add_conditional_edges(
             "check_wikipedia_suitability",
             lambda s: "search_wikipedia" if s.get("is_wiki") else "check_reasoning_needed"
         )
         builder.add_conditional_edges(
             "check_reasoning_needed",
             lambda s: "general_reasoning_qa" if s.get("needs_reasoning") else "check_python_suitability"
         )
         builder.add_conditional_edges(
             "check_python_suitability",
             lambda s: "generate_code" if s.get("is_python") else "fallback"
         )
+        # Ending edges
         builder.add_edge("solve_riddle", END)
         builder.add_edge("search_wikipedia", END)
         builder.add_edge("general_reasoning_qa", END)
         builder.add_edge("generate_code", END)
         builder.add_edge("fallback", END)
         graph = builder.compile()
         return graph
     def __call__(self, question: str) -> str:
         state = {"question": question}
         result = self.graph.invoke(state)
         return result.get("response", "No answer generated.")
 ########################################
 def run_and_submit_all( profile: gr.OAuthProfile | None):