giulia-fontanella commited on
Commit
9b2bab8
·
verified ·
1 Parent(s): 7eb753f

Create tools.py

Browse files
Files changed (1) hide show
  1. tools.py +61 -0
tools.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ from langchain_core.messages import AnyMessage, HumanMessage, AIMessage
3
+
4
+
5
+ def extract_text(img_path: str) -> str:
6
+ """
7
+ Extract text from an image file using a multimodal model.
8
+ """
9
+ all_text = ""
10
+ try:
11
+ # Read image and encode as base64
12
+ with open(img_path, "rb") as image_file:
13
+ image_bytes = image_file.read()
14
+
15
+ image_base64 = base64.b64encode(image_bytes).decode("utf-8")
16
+
17
+ # Prepare the prompt including the base64 image data
18
+ message = [
19
+ HumanMessage(
20
+ content=[
21
+ {
22
+ "type": "text",
23
+ "text": (
24
+ "Extract all the text from this image. "
25
+ "Return only the extracted text, no explanations."
26
+ ),
27
+ },
28
+ {
29
+ "type": "image_url",
30
+ "image_url": {
31
+ "url": f"data:image/png;base64,{image_base64}"
32
+ },
33
+ },
34
+ ]
35
+ )
36
+ ]
37
+
38
+ # Call the vision-capable model
39
+ response = vision_llm.invoke(message)
40
+
41
+ # Append extracted text
42
+ all_text += response.content + "\n\n"
43
+
44
+ return all_text.strip()
45
+ except Exception as e:
46
+ error_msg = f"Error extracting text: {str(e)}"
47
+ print(error_msg)
48
+ return ""
49
+
50
+
51
+ def web_search(query: str):
52
+ """Performs a web search using SerpAPI."""
53
+ search = GoogleSearch({
54
+ "q": query,
55
+ "num": 5,
56
+ "api_key": "your_serpapi_key"
57
+ })
58
+ results = search.get_dict()["organic_results"]
59
+ return results
60
+
61
+