gabejavitt commited on
Commit
0d9d65c
·
verified ·
1 Parent(s): 5c0d9d7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +500 -140
app.py CHANGED
@@ -87,7 +87,95 @@ def initialize_rag_components():
87
  print("✅ Text splitter initialized.")
88
 
89
  return True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  # =============================================================================
92
  # ASR INITIALIZATION
93
  # =============================================================================
@@ -270,17 +358,41 @@ class SearchInput(BaseModel):
270
 
271
  @tool(args_schema=SearchInput)
272
  def search_tool(query: str) -> str:
273
- """Searches web via DuckDuckGo. Use for facts, recent info."""
 
 
 
 
 
 
 
 
 
 
274
  if not isinstance(query, str) or not query.strip():
275
  return "Error: Invalid query."
276
 
 
 
 
 
277
  print(f"🔍 Searching: {query}")
278
- try:
279
- search = DuckDuckGoSearchRun()
280
- result = search.run(query)
281
- return truncate_if_needed(result)
282
- except Exception as e:
283
- return f"Search error: {str(e)}"
 
 
 
 
 
 
 
 
 
 
284
 
285
 
286
  class CalcInput(BaseModel):
@@ -318,8 +430,7 @@ class CodeInput(BaseModel):
318
  @tool(args_schema=CodeInput)
319
  def code_interpreter(code: str) -> str:
320
  """
321
- Executes Python code. Use for data processing, complex logic.
322
- Available: pandas, numpy, json, re, datetime
323
  CRITICAL: Always use print() to output results!
324
  """
325
  if not isinstance(code, str):
@@ -560,164 +671,413 @@ class YoutubeInput(BaseModel):
560
 
561
  @tool(args_schema=YoutubeInput)
562
  def get_youtube_transcript(video_url: str) -> str:
563
- """Fetches YouTube video transcript using yt-dlp."""
 
 
 
564
  if not video_url:
565
  return "Error: Invalid URL."
566
 
567
  print(f"📺 YouTube transcript: {video_url}")
568
 
569
- try:
570
- # Extract video ID
571
- video_id = None
572
- if "watch?v=" in video_url:
573
- video_id = video_url.split("v=")[1].split("&")[0]
574
- elif "youtu.be/" in video_url:
575
- video_id = video_url.split("youtu.be/")[1].split("?")[0]
576
-
577
- if not video_id:
578
- return f"Error: Could not extract video ID."
579
-
580
- # Use yt-dlp to get subtitles
581
- subtitle_file = f'{video_id}.en.vtt'
582
-
583
- cmd = [
584
- 'yt-dlp',
585
- '--skip-download',
586
- '--write-auto-subs',
587
- '--write-subs',
588
- '--sub-lang', 'en',
589
- '--sub-format', 'vtt',
590
- '--output', video_id,
591
- video_url
592
- ]
593
-
594
- print(f"🔧 Running: {' '.join(cmd)}")
595
- result = subprocess.run(cmd, capture_output=True, text=True, timeout=45)
596
-
597
- if result.returncode != 0:
598
- print(f"⚠️ yt-dlp stderr: {result.stderr}")
599
- return f"Error: Could not fetch subtitles - {result.stderr[:200]}"
600
-
601
- # Try to find the subtitle file (might have different naming)
602
- import glob
603
- vtt_files = glob.glob(f"{video_id}*.vtt")
604
-
605
- if not vtt_files:
606
- return "Error: No English subtitles found for this video."
607
-
608
- subtitle_file = vtt_files[0]
609
- print(f"✓ Found subtitle file: {subtitle_file}")
610
-
611
- # Read and parse VTT file
612
- with open(subtitle_file, 'r', encoding='utf-8') as f:
613
- content = f.read()
614
-
615
- # Remove VTT headers and timestamps
616
- lines = content.split('\n')
617
- transcript_parts = []
618
-
619
- for line in lines:
620
- line = line.strip()
621
- # Skip WEBVTT header, timestamps, and empty lines
622
- if (line and
623
- not line.startswith('WEBVTT') and
624
- not '-->' in line and
625
- not line.isdigit() and
626
- not line.startswith('Kind:') and
627
- not line.startswith('Language:')):
628
- transcript_parts.append(line)
629
-
630
- full_transcript = " ".join(transcript_parts)
631
-
632
- # Cleanup subtitle files
633
- for vtt_file in vtt_files:
634
- try:
635
- os.remove(vtt_file)
636
- except:
637
- pass
638
-
639
- if not full_transcript:
640
- return "Error: Transcript was empty."
641
-
642
- print(f"✓ Transcript extracted: {len(full_transcript)} chars")
643
- return f"Transcript:\n{truncate_if_needed(full_transcript)}"
644
-
645
- except subprocess.TimeoutExpired:
646
- return "Error: yt-dlp timed out after 45 seconds."
647
- except FileNotFoundError:
648
- return "Error: yt-dlp not installed. Add 'yt-dlp' to requirements.txt"
649
- except Exception as e:
650
- print(f"❌ Error: {str(e)}")
651
- print(traceback.format_exc())
652
- return f"Transcript error: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
653
 
654
 
655
  class ScrapeInput(BaseModel):
656
  url: str = Field(description="URL (must start with http:// or https://)")
657
- query: str = Field(description="What to find on the page")
658
 
659
  @tool(args_schema=ScrapeInput)
660
  def scrape_and_retrieve(url: str, query: str) -> str:
661
  """
662
- Scrapes webpage and uses RAG to find relevant info.
663
- Use when you need specific info from a known URL.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
664
  """
665
  if not url.startswith(('http://', 'https://')):
666
- return f"Error: Invalid URL format."
667
  if not query:
668
- return "Error: Query required."
669
 
670
  if global_embeddings is None or global_text_splitter is None:
671
  if not initialize_rag_components():
672
- return "Error: RAG not initialized."
673
 
674
  print(f"🌐 Scraping: {url}")
 
675
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
676
  try:
677
- headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
678
- response = requests.get(url, headers=headers, timeout=20)
679
- response.raise_for_status()
680
-
681
- soup = BeautifulSoup(response.text, 'html.parser')
682
-
683
- for tag in soup(["script", "style", "nav", "footer", "aside", "header", "iframe"]):
684
- tag.extract()
685
-
686
- main = soup.find('main') or soup.find('article') or soup.body
687
-
688
- if not main:
689
- return "Error: No main content found."
690
-
691
- text = main.get_text(separator='\n', strip=True)
692
- lines = [l.strip() for l in text.splitlines() if l.strip()]
693
- text = '\n'.join(lines)
694
-
695
- if len(text) < 50:
696
- return f"Error: Content too short ({len(text)} chars)."
697
-
698
- chunks = global_text_splitter.split_text(text)
699
-
700
- if not chunks:
701
- return "Error: Could not chunk text."
702
-
703
- docs = [Document(page_content=c, metadata={"source": url}) for c in chunks]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
704
 
705
- db = FAISS.from_documents(docs, global_embeddings)
706
- retriever = db.as_retriever(search_kwargs={"k": 5})
707
- retrieved = retriever.invoke(query)
708
 
709
- if not retrieved:
710
- return f"No relevant info found for: '{query}'"
 
 
711
 
712
- context = "\n\n---\n\n".join([f"[Chunk {i+1}]\n{d.page_content}" for i, d in enumerate(retrieved)])
713
 
714
- return truncate_if_needed(f"From {url}:\n\n{context}")
715
-
716
- except requests.RequestException as e:
717
- return f"Fetch error: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
718
  except Exception as e:
719
- return f"Scrape error: {str(e)}\n{traceback.format_exc()}"
720
-
721
 
722
  class FinalAnswerInput(BaseModel):
723
  answer: str = Field(description="Final answer - EXACTLY what was asked, nothing more")
@@ -761,7 +1121,7 @@ defined_tools = [
761
 
762
  # Specialized
763
  audio_transcription_tool,
764
- analyze_image, # NEW: Image analysis tool
765
  get_youtube_transcript,
766
  scrape_and_retrieve,
767
 
 
87
  print("✅ Text splitter initialized.")
88
 
89
  return True
90
+ # =============================================================================
91
+ # ANSWER SHEET VALIDATION FUNCTIONS
92
+ # =============================================================================
93
+
94
+ def load_answer_sheet(filepath: str = "answer_sheet.json") -> Dict[str, str]:
95
+ """Load the answer sheet from a JSON file"""
96
+ try:
97
+ if os.path.exists(filepath):
98
+ with open(filepath, 'r', encoding='utf-8') as f:
99
+ answers = json.load(f)
100
+ print(f"✅ Loaded answer sheet with {len(answers)} answers from {filepath}")
101
+ return answers
102
+ else:
103
+ print(f"⚠️ Answer sheet not found at {filepath}")
104
+ return {}
105
+ except Exception as e:
106
+ print(f"❌ Error loading answer sheet: {e}")
107
+ return {}
108
+
109
+
110
+ def check_answer_correctness(submitted: str, correct: str) -> Tuple[bool, str]:
111
+ """
112
+ Check if submitted answer matches correct answer with fuzzy matching
113
+ Returns: (is_correct, feedback_message)
114
+ """
115
+ # Normalize both answers
116
+ submitted_norm = submitted.strip().lower()
117
+ correct_norm = correct.strip().lower()
118
+
119
+ # Exact match
120
+ if submitted_norm == correct_norm:
121
+ return True, "✅ EXACT MATCH"
122
+
123
+ # Remove common punctuation and check again
124
+ import string
125
+ submitted_clean = submitted_norm.translate(str.maketrans('', '', string.punctuation))
126
+ correct_clean = correct_norm.translate(str.maketrans('', '', string.punctuation))
127
+
128
+ if submitted_clean == correct_clean:
129
+ return True, "✅ MATCH (punctuation difference)"
130
+
131
+ # Check if it's a number formatting issue
132
+ try:
133
+ # Try to parse as numbers
134
+ submitted_num = float(submitted_clean.replace(',', '').replace('$', ''))
135
+ correct_num = float(correct_clean.replace(',', '').replace('$', ''))
136
+ if abs(submitted_num - correct_num) < 0.01: # Allow small floating point differences
137
+ return True, "✅ MATCH (numeric equivalence)"
138
+ except (ValueError, AttributeError):
139
+ pass
140
+
141
+ # Check if submitted answer contains correct answer (for list-type answers)
142
+ if ',' in correct_norm:
143
+ correct_items = set([item.strip() for item in correct_norm.split(',')])
144
+ submitted_items = set([item.strip() for item in submitted_norm.split(',')])
145
+
146
+ if correct_items == submitted_items:
147
+ return True, "✅ MATCH (item order difference)"
148
+
149
+ missing_items = correct_items - submitted_items
150
+ extra_items = submitted_items - correct_items
151
+
152
+ if missing_items and not extra_items:
153
+ return False, f"❌ MISSING: {', '.join(missing_items)}"
154
+ elif extra_items and not missing_items:
155
+ return False, f"❌ EXTRA: {', '.join(extra_items)}"
156
+ elif missing_items and extra_items:
157
+ return False, f"❌ MISSING: {', '.join(missing_items)} | EXTRA: {', '.join(extra_items)}"
158
+
159
+ # Check case-insensitive substring match
160
+ if submitted_norm in correct_norm or correct_norm in submitted_norm:
161
+ return False, f"❌ PARTIAL MATCH (submitted: '{submitted}' | correct: '{correct}')"
162
+
163
+ return False, f"❌ WRONG (submitted: '{submitted}' | correct: '{correct}')"
164
 
165
+
166
+ def create_answer_sheet_template(questions: List[Dict], filepath: str = "answer_sheet.json"):
167
+ """Create an answer sheet template from questions"""
168
+ answer_template = {}
169
+ for q in questions:
170
+ answer_template[q['task_id']] = ""
171
+
172
+ with open(filepath, 'w', encoding='utf-8') as f:
173
+ json.dump(answer_template, f, indent=2)
174
+
175
+ print(f"✅ Created answer sheet template at {filepath}")
176
+ print(f" Please fill in the correct answers for {len(answer_template)} questions")
177
+
178
+
179
  # =============================================================================
180
  # ASR INITIALIZATION
181
  # =============================================================================
 
358
 
359
  @tool(args_schema=SearchInput)
360
  def search_tool(query: str) -> str:
361
+ """
362
+ Search the web for information. Returns snippets.
363
+
364
+ IMPORTANT: Search results are SNIPPETS only. For complete information:
365
+ 1. Use search_tool to find URLs
366
+ 2. Use scrape_and_retrieve to get FULL page content
367
+
368
+ Example workflow:
369
+ - search_tool("Mercedes Sosa Wikipedia") → get URL
370
+ - scrape_and_retrieve(url=..., query="studio albums 2000-2009")
371
+ """
372
  if not isinstance(query, str) or not query.strip():
373
  return "Error: Invalid query."
374
 
375
+ # Auto-add Wikipedia site filter if mentioned
376
+ if 'wikipedia' in query.lower() and 'site:' not in query:
377
+ query = f"{query} site:wikipedia.org"
378
+
379
  print(f"🔍 Searching: {query}")
380
+
381
+ max_retries = 3
382
+ for attempt in range(max_retries):
383
+ try:
384
+ search = DuckDuckGoSearchRun()
385
+ result = search.run(query)
386
+
387
+ if not result or len(result) < 50:
388
+ return "No relevant results found. Try different search terms or check if the information exists."
389
+
390
+ return truncate_if_needed(result)
391
+ except Exception as e:
392
+ if attempt < max_retries - 1:
393
+ time.sleep(2 ** attempt)
394
+ continue
395
+ return f"Search error after {max_retries} attempts: {str(e)}"
396
 
397
 
398
  class CalcInput(BaseModel):
 
430
  @tool(args_schema=CodeInput)
431
  def code_interpreter(code: str) -> str:
432
  """
433
+ Executes Python code with timeout protection.
 
434
  CRITICAL: Always use print() to output results!
435
  """
436
  if not isinstance(code, str):
 
671
 
672
  @tool(args_schema=YoutubeInput)
673
  def get_youtube_transcript(video_url: str) -> str:
674
+ """
675
+ Fetches YouTube video transcript with retry logic.
676
+ Returns N/A if video is inaccessible.
677
+ """
678
  if not video_url:
679
  return "Error: Invalid URL."
680
 
681
  print(f"📺 YouTube transcript: {video_url}")
682
 
683
+ max_retries = 3
684
+ for attempt in range(max_retries):
685
+ try:
686
+ # Extract video ID
687
+ video_id = None
688
+ if "watch?v=" in video_url:
689
+ video_id = video_url.split("v=")[1].split("&")[0]
690
+ elif "youtu.be/" in video_url:
691
+ video_id = video_url.split("youtu.be/")[1].split("?")[0]
692
+
693
+ if not video_id:
694
+ return f"Error: Could not extract video ID from URL."
695
+
696
+ cmd = [
697
+ 'yt-dlp',
698
+ '--skip-download',
699
+ '--write-auto-subs',
700
+ '--write-subs',
701
+ '--sub-lang', 'en',
702
+ '--sub-format', 'vtt',
703
+ '--output', video_id,
704
+ video_url
705
+ ]
706
+
707
+ print(f"🔧 Running yt-dlp (attempt {attempt + 1}/{max_retries})...")
708
+ result = subprocess.run(cmd, capture_output=True, text=True, timeout=45)
709
+
710
+ if result.returncode != 0:
711
+ stderr = result.stderr
712
+
713
+ # Check for network errors
714
+ if 'Failed to resolve' in stderr or 'No address associated' in stderr:
715
+ if attempt < max_retries - 1:
716
+ print(f"⚠️ Network error, retrying...")
717
+ time.sleep(2 ** attempt)
718
+ continue
719
+ return "N/A - YouTube is inaccessible due to network issues."
720
+
721
+ return f"Error: Could not fetch subtitles - {stderr[:200]}"
722
+
723
+ # Find subtitle file
724
+ import glob
725
+ vtt_files = glob.glob(f"{video_id}*.vtt")
726
+
727
+ if not vtt_files:
728
+ return "N/A - No English subtitles found for this video."
729
+
730
+ subtitle_file = vtt_files[0]
731
+ print(f"✓ Found subtitle file: {subtitle_file}")
732
+
733
+ # Parse VTT
734
+ with open(subtitle_file, 'r', encoding='utf-8') as f:
735
+ content = f.read()
736
+
737
+ lines = content.split('\n')
738
+ transcript_parts = []
739
+
740
+ for line in lines:
741
+ line = line.strip()
742
+ if (line and
743
+ not line.startswith('WEBVTT') and
744
+ not '-->' in line and
745
+ not line.isdigit() and
746
+ not line.startswith('Kind:') and
747
+ not line.startswith('Language:')):
748
+ transcript_parts.append(line)
749
+
750
+ full_transcript = " ".join(transcript_parts)
751
+
752
+ # Cleanup
753
+ for vtt_file in vtt_files:
754
+ try:
755
+ os.remove(vtt_file)
756
+ except:
757
+ pass
758
+
759
+ if not full_transcript:
760
+ return "Error: Transcript was empty."
761
+
762
+ print(f" Transcript extracted: {len(full_transcript)} chars")
763
+ return f"Transcript:\n{truncate_if_needed(full_transcript)}"
764
+
765
+ except subprocess.TimeoutExpired:
766
+ if attempt < max_retries - 1:
767
+ continue
768
+ return "N/A - YouTube request timed out."
769
+ except FileNotFoundError:
770
+ return "Error: yt-dlp not installed."
771
+ except Exception as e:
772
+ if attempt < max_retries - 1:
773
+ time.sleep(2 ** attempt)
774
+ continue
775
+ print(f"❌ Error: {str(e)}")
776
+ return f"Error: {str(e)}"
777
+
778
+ return "N/A - YouTube transcript unavailable after multiple attempts."
779
 
780
 
781
  class ScrapeInput(BaseModel):
782
  url: str = Field(description="URL (must start with http:// or https://)")
783
+ query: str = Field(description="Specific information to find on the page")
784
 
785
  @tool(args_schema=ScrapeInput)
786
  def scrape_and_retrieve(url: str, query: str) -> str:
787
  """
788
+ Fetch and search FULL webpage content using RAG (not just snippets like search_tool).
789
+
790
+ CRITICAL: Use this after search_tool gives you a URL. This gets the COMPLETE page.
791
+
792
+ Workflow Example:
793
+ 1. search_tool('Mercedes Sosa Wikipedia') → get URL
794
+ 2. scrape_and_retrieve(
795
+ url='https://en.wikipedia.org/wiki/Mercedes_Sosa',
796
+ query='studio albums released between 2000 and 2009'
797
+ ) → Returns FULL discography section
798
+
799
+ Use when:
800
+ - Counting items (albums, people, events, etc.)
801
+ - Finding specific names, dates, or numbers
802
+ - Need complete tables or lists
803
+ - Wikipedia articles, documentation, papers
804
+ - Search snippets weren't enough
805
  """
806
  if not url.startswith(('http://', 'https://')):
807
+ return f"Error: Invalid URL format. Must start with http:// or https://"
808
  if not query:
809
+ return "Error: Query required to search the page content."
810
 
811
  if global_embeddings is None or global_text_splitter is None:
812
  if not initialize_rag_components():
813
+ return "Error: RAG components not initialized."
814
 
815
  print(f"🌐 Scraping: {url}")
816
+ print(f" Looking for: {query[:100]}...")
817
 
818
+ max_retries = 3
819
+ for attempt in range(max_retries):
820
+ try:
821
+ headers = {
822
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
823
+ }
824
+ response = requests.get(url, headers=headers, timeout=20)
825
+ response.raise_for_status()
826
+
827
+ soup = BeautifulSoup(response.text, 'html.parser')
828
+
829
+ # Remove noise
830
+ for tag in soup(["script", "style", "nav", "footer", "aside", "header", "iframe"]):
831
+ tag.extract()
832
+
833
+ # Extract main content
834
+ main = soup.find('main') or soup.find('article') or soup.find('div', class_='mw-parser-output') or soup.body
835
+
836
+ if not main:
837
+ return "Error: Could not find main content on page."
838
+
839
+ text = main.get_text(separator='\n', strip=True)
840
+ lines = [l.strip() for l in text.splitlines() if l.strip()]
841
+ text = '\n'.join(lines)
842
+
843
+ if len(text) < 50:
844
+ return f"Error: Page content too short ({len(text)} chars). May be blocked or empty."
845
+
846
+ print(f"✓ Extracted {len(text)} characters from page")
847
+
848
+ # Chunk and search
849
+ chunks = global_text_splitter.split_text(text)
850
+
851
+ if not chunks:
852
+ return "Error: Could not process page content."
853
+
854
+ print(f"✓ Created {len(chunks)} chunks")
855
+
856
+ docs = [Document(page_content=c, metadata={"source": url}) for c in chunks]
857
+
858
+ db = FAISS.from_documents(docs, global_embeddings)
859
+ retriever = db.as_retriever(search_kwargs={"k": 5})
860
+ retrieved = retriever.invoke(query)
861
+
862
+ if not retrieved:
863
+ return f"No information found matching: '{query}'\nTry a different query or the information may not be on this page."
864
+
865
+ print(f"✓ Found {len(retrieved)} relevant chunks")
866
+
867
+ context = "\n\n---\n\n".join([f"[Section {i+1}]\n{d.page_content}" for i, d in enumerate(retrieved)])
868
+
869
+ return truncate_if_needed(f"From {url}:\n\n{context}")
870
+
871
+ except requests.Timeout:
872
+ if attempt < max_retries - 1:
873
+ print(f"⚠️ Timeout, retrying... (attempt {attempt + 1}/{max_retries})")
874
+ time.sleep(2 ** attempt)
875
+ continue
876
+ return f"Error: Page request timed out after {max_retries} attempts."
877
+ except requests.RequestException as e:
878
+ if attempt < max_retries - 1:
879
+ time.sleep(2 ** attempt)
880
+ continue
881
+ return f"Error fetching page: {str(e)}"
882
+ except Exception as e:
883
+ return f"Error processing page: {str(e)}\n{traceback.format_exc()}"
884
+
885
+ def analyze_chess_position(args: str, state: AgentState) -> str:
886
+ """
887
+ Analyze chess position using Stockfish engine via lichess API or python-chess
888
+ Input format: "image_path|description" or just FEN notation
889
+ """
890
  try:
891
+ # Try to use python-chess with Stockfish
892
+ try:
893
+ import chess
894
+ import chess.engine
895
+
896
+ # Check if we have an image to analyze first
897
+ if '|' in args and os.path.exists(args.split('|')[0]):
898
+ image_path = args.split('|')[0]
899
+
900
+ # Use Gemini to extract FEN from image
901
+ print("📸 Extracting chess position from image...")
902
+ img = Image.open(image_path)
903
+ model = genai.GenerativeModel('gemini-2.0-flash-exp')
904
+
905
+ fen_prompt = """Analyze this chess board image and provide the position in FEN notation.
906
+
907
+ Important instructions:
908
+ 1. Carefully identify each piece and its position
909
+ 2. Determine whose turn it is (look for indicators in the image)
910
+ 3. Return ONLY the FEN string, nothing else
911
+ 4. Format: piece_placement active_color castling en_passant halfmove fullmove
912
+
913
+ Example: rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1
914
+
915
+ If it says "Black to move" or "Black's turn", use 'b' for active color.
916
+ If it says "White to move" or "White's turn", use 'w' for active color."""
917
+
918
+ response = model.generate_content([fen_prompt, img])
919
+ fen = response.text.strip()
920
+
921
+ # Clean up the FEN (remove markdown, explanations, etc.)
922
+ fen_lines = fen.split('\n')
923
+ for line in fen_lines:
924
+ line = line.strip()
925
+ # FEN should have spaces and slashes
926
+ if '/' in line and ' ' in line and not line.startswith('#'):
927
+ fen = line
928
+ break
929
+
930
+ print(f"📊 Extracted FEN: {fen}")
931
+ else:
932
+ # Direct FEN input
933
+ fen = args.strip()
934
+
935
+ # Parse the position
936
+ try:
937
+ board = chess.Board(fen)
938
+ except Exception as e:
939
+ return f"N/A_REQUIRED: Invalid FEN notation - {str(e)}"
940
+
941
+ # Try to use Stockfish engine
942
+ stockfish_paths = [
943
+ "/usr/games/stockfish",
944
+ "/usr/local/bin/stockfish",
945
+ "/opt/homebrew/bin/stockfish",
946
+ "stockfish",
947
+ "./stockfish"
948
+ ]
949
+
950
+ engine_path = None
951
+ for path in stockfish_paths:
952
+ if os.path.exists(path) or path == "stockfish":
953
+ engine_path = path
954
+ break
955
+
956
+ if not engine_path:
957
+ # Fallback to lichess API
958
+ print("⚠️ Stockfish not found locally, using Lichess API...")
959
+ return analyze_chess_via_lichess(board.fen(), state)
960
+
961
+ # Use local Stockfish
962
+ with chess.engine.SimpleEngine.popen_uci(engine_path) as engine:
963
+ # Analyze position
964
+ info = engine.analyse(board, chess.engine.Limit(depth=20))
965
+ best_move = info.get("pv")[0] if "pv" in info else None
966
+
967
+ if best_move:
968
+ # Convert to algebraic notation
969
+ san_move = board.san(best_move)
970
+
971
+ # Get evaluation score
972
+ score = info.get("score")
973
+ score_str = ""
974
+ if score:
975
+ if score.is_mate():
976
+ mate_in = score.relative.moves
977
+ score_str = f" (Mate in {abs(mate_in)})"
978
+ else:
979
+ cp = score.relative.score()
980
+ score_str = f" (Eval: {cp/100:.2f})"
981
+
982
+ # Check if this move leads to checkmate
983
+ board_copy = board.copy()
984
+ board_copy.push(best_move)
985
+
986
+ result = f"{san_move}{score_str}"
987
+
988
+ if board_copy.is_checkmate():
989
+ result += " - Checkmate!"
990
+ elif board_copy.is_check():
991
+ result += " - Check"
992
+
993
+ print(f"♟️ Best move: {result}")
994
+ return result
995
+ else:
996
+ return "N/A_REQUIRED: Could not determine best move"
997
+
998
+ except ImportError:
999
+ print("⚠️ python-chess not installed, using Lichess API...")
1000
+ # Extract FEN from image if needed
1001
+ if '|' in args and os.path.exists(args.split('|')[0]):
1002
+ image_path = args.split('|')[0]
1003
+ img = Image.open(image_path)
1004
+ model = genai.GenerativeModel('gemini-2.0-flash-exp')
1005
+
1006
+ response = model.generate_content([
1007
+ "Extract the chess position in FEN notation. Return ONLY the FEN string.",
1008
+ img
1009
+ ])
1010
+ fen = response.text.strip()
1011
+ else:
1012
+ fen = args.strip()
1013
+
1014
+ return analyze_chess_via_lichess(fen, state)
1015
+
1016
+ except Exception as e:
1017
+ state.add_failure('chess', str(e))
1018
+ return f"N/A_REQUIRED: Chess analysis failed - {str(e)}"
1019
+
1020
+
1021
+ def analyze_chess_via_lichess(fen: str, state: AgentState) -> str:
1022
+ """
1023
+ Analyze chess position using Lichess cloud API
1024
+ """
1025
+ try:
1026
+ # Lichess cloud evaluation API
1027
+ url = "https://lichess.org/api/cloud-eval"
1028
 
1029
+ # Clean FEN
1030
+ fen = fen.strip().replace('```', '').replace('fen', '').strip()
 
1031
 
1032
+ params = {
1033
+ "fen": fen,
1034
+ "multiPv": 1 # Get best move only
1035
+ }
1036
 
1037
+ response = requests.get(url, params=params, timeout=10)
1038
 
1039
+ if response.status_code == 200:
1040
+ data = response.json()
1041
+
1042
+ if "pvs" in data and len(data["pvs"]) > 0:
1043
+ best_pv = data["pvs"][0]
1044
+
1045
+ # Get the moves in UCI notation
1046
+ moves = best_pv.get("moves", "").split()
1047
+ if moves:
1048
+ # Convert UCI to SAN using python-chess if available
1049
+ try:
1050
+ import chess
1051
+ board = chess.Board(fen)
1052
+ uci_move = chess.Move.from_uci(moves[0])
1053
+ san_move = board.san(uci_move)
1054
+
1055
+ # Get evaluation
1056
+ cp = best_pv.get("cp")
1057
+ mate = best_pv.get("mate")
1058
+
1059
+ if mate is not None:
1060
+ eval_str = f" (Mate in {abs(mate)})"
1061
+ elif cp is not None:
1062
+ eval_str = f" (Eval: {cp/100:.2f})"
1063
+ else:
1064
+ eval_str = ""
1065
+
1066
+ return f"{san_move}{eval_str}"
1067
+ except:
1068
+ # Return UCI move if can't convert
1069
+ return moves[0]
1070
+ else:
1071
+ return "N/A_REQUIRED: No moves found in analysis"
1072
+ else:
1073
+ return "N/A_REQUIRED: Position not in Lichess cloud database"
1074
+ else:
1075
+ state.add_failure('lichess', f'HTTP {response.status_code}')
1076
+ return f"N/A_REQUIRED: Lichess API error {response.status_code}"
1077
+
1078
  except Exception as e:
1079
+ state.add_failure('lichess', str(e))
1080
+ return f"N/A_REQUIRED: Lichess analysis failed - {str(e)}"
1081
 
1082
  class FinalAnswerInput(BaseModel):
1083
  answer: str = Field(description="Final answer - EXACTLY what was asked, nothing more")
 
1121
 
1122
  # Specialized
1123
  audio_transcription_tool,
1124
+ analyze_image,
1125
  get_youtube_transcript,
1126
  scrape_and_retrieve,
1127