WeByT3 commited on
Commit
2d81d4a
·
verified ·
1 Parent(s): d3ce528

Update tools.py

Browse files
Files changed (1) hide show
  1. tools.py +2 -136
tools.py CHANGED
@@ -1,22 +1,6 @@
1
  from langchain_core.tools import tool
2
- from youtube_transcript_api import YouTubeTranscriptApi
3
- from SPARQLWrapper import SPARQLWrapper, JSON
4
- import json
5
  import pandas as pd
6
- import whisper
7
  import os
8
- import re
9
- import tempfile
10
- from transformers import Blip2Processor, Blip2ForConditionalGeneration
11
- from PIL import Image
12
- import torch
13
-
14
- whisper_model = whisper.load_model("base")
15
- device = "cuda" if torch.cuda.is_available() else "cpu"
16
-
17
- # Load model and processor once
18
- processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
19
- model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b", device_map="auto").to(device)
20
 
21
 
22
  @tool
@@ -31,7 +15,7 @@ def add(a: int, b: int) -> int:
31
  return a + b
32
 
33
  @tool
34
- def substract(a: int, b: int) -> int:
35
  """
36
  Subtracts one value from another and returns the result of the sum
37
 
@@ -63,122 +47,4 @@ def divide(a: int, b: int) -> int:
63
  """
64
  if b == 0:
65
  raise ValueError("Cannot divide by zero.")
66
- return a / b
67
-
68
- def match_referenced_files(question: str, uploaded_files: list) -> list:
69
- """
70
- Given a question string and a list of uploaded files with metadata,
71
- return the list of files that are referenced in the question.
72
-
73
- Parameters:
74
- - question: str — The user's question.
75
- - uploaded_files: list of dicts with keys:
76
- - name (e.g., "data.xlsx")
77
- - type (e.g., "excel", "image", "audio")
78
- - path (e.g., "/tmp/data.xlsx")
79
-
80
- Returns:
81
- A list of file dicts that were referenced in the question.
82
- """
83
- # Find all mentions of possible filenames in the question (e.g., .xlsx, .png, .mp3)
84
- referenced_names = set(re.findall(r'[\w\-\s]+\.(xlsx|xls|csv|png|jpg|jpeg|mp3|wav)', question, flags=re.IGNORECASE))
85
-
86
- matched = []
87
- for file in uploaded_files:
88
- base_name = os.path.basename(file["name"]).lower()
89
- if any(base_name.endswith(name.lower()) for name in referenced_names):
90
- matched.append(file)
91
-
92
- return matched
93
-
94
-
95
- @tool
96
- def analyze_image(image_path: str, question: str = "What’s in this image?") -> str:
97
- """
98
- Analyzes an image and answers a question about it using BLIP-2.
99
-
100
- Args:
101
- image_path (str): Path to the image file.
102
- question (str): A natural language question about the image.
103
- """
104
- try:
105
- image = Image.open(image_path).convert("RGB")
106
- inputs = processor(images=image, text=question, return_tensors="pt").to(device)
107
- generated_ids = model.generate(**inputs, max_new_tokens=100)
108
- response = processor.decode(generated_ids[0], skip_special_tokens=True)
109
- return response
110
- except Exception as e:
111
- return f"Error analyzing image: {str(e)}"
112
-
113
- @tool
114
- def read_excel_summary(file_path: str) -> str:
115
- """
116
- Reads an Excel file and returns basic summary statistics, column names, and row count.
117
-
118
- Args:
119
- file_path: The path to the Excel file
120
- """
121
- try:
122
- df = pd.read_excel(file_path, engine="openpyxl")
123
- info = {
124
- "columns": df.columns.tolist(),
125
- "num_rows": len(df),
126
- "summary": df.describe(include='all').to_dict()
127
- }
128
- return str(info)
129
- except Exception as e:
130
- return f"Error reading Excel file: {str(e)}"
131
-
132
- @tool
133
- def wiki_search(query: str) -> str:
134
- """Query semantic knowledge bases using SPARQL
135
-
136
- Args:
137
- query: The search query for SPARQL."""
138
- try:
139
- endpoint = SPARQLWrapper("https://query.wikidata.org/sparql")
140
- endpoint.setQuery(query)
141
- endpoint.setReturnFormat(JSON)
142
-
143
- results = endpoint.query().convert()
144
- return json.dumps(results, indent=2)
145
- except Exception as e:
146
- return f"Error executing SPARQL query: {str(e)}"
147
-
148
- @tool
149
- def transcribe_audio(file_path: str) -> str:
150
- """
151
- Transcribes an audio file (MP3, WAV, etc.) to text using Whisper.
152
- Returns the transcribed text.
153
-
154
- Args:
155
- file_path: the path to the audio file
156
- """
157
- try:
158
- result = whisper_model.transcribe(file_path)
159
- return result["text"]
160
- except Exception as e:
161
- return f"Error during transcription: {str(e)}"
162
-
163
- @tool
164
- def fetch_youtube_transcript(video_url: str) -> str:
165
- """
166
- Fetch transcript of a YouTube video by URL.
167
- Returns plain text if transcript is available.
168
-
169
- Args:
170
- video_url: The YouTube url link to the video
171
- """
172
- import re
173
- video_id_match = re.search(r"(?:v=|youtu\.be/)([\w-]+)", video_url)
174
- if not video_id_match:
175
- return "Invalid YouTube URL."
176
-
177
- video_id = video_id_match.group(1)
178
-
179
- try:
180
- transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
181
- transcript_text = " ".join([entry['text'] for entry in transcript_list])
182
- return transcript_text[:4000] # Limit to fit into context window
183
- except Exception as e:
184
- return f"Transcript not available or error: {str(e)}"
 
1
  from langchain_core.tools import tool
 
 
 
2
  import pandas as pd
 
3
  import os
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
 
6
  @tool
 
15
  return a + b
16
 
17
  @tool
18
+ def subtract(a: int, b: int) -> int:
19
  """
20
  Subtracts one value from another and returns the result of the sum
21
 
 
47
  """
48
  if b == 0:
49
  raise ValueError("Cannot divide by zero.")
50
+ return a / b