| import base64 |
| from langchain_core.messages import AnyMessage, HumanMessage, AIMessage |
|
|
|
|
| def extract_text(img_path: str) -> str: |
| """ |
| Extract text from an image file using a multimodal model. |
| """ |
| all_text = "" |
| try: |
| |
| with open(img_path, "rb") as image_file: |
| image_bytes = image_file.read() |
|
|
| image_base64 = base64.b64encode(image_bytes).decode("utf-8") |
|
|
| |
| message = [ |
| HumanMessage( |
| content=[ |
| { |
| "type": "text", |
| "text": ( |
| "Extract all the text from this image. " |
| "Return only the extracted text, no explanations." |
| ), |
| }, |
| { |
| "type": "image_url", |
| "image_url": { |
| "url": f"data:image/png;base64,{image_base64}" |
| }, |
| }, |
| ] |
| ) |
| ] |
|
|
| |
| response = vision_llm.invoke(message) |
|
|
| |
| all_text += response.content + "\n\n" |
|
|
| return all_text.strip() |
| except Exception as e: |
| error_msg = f"Error extracting text: {str(e)}" |
| print(error_msg) |
| return "" |
|
|
|
|
| def web_search(query: str): |
| """Performs a web search using SerpAPI.""" |
| search = GoogleSearch({ |
| "q": query, |
| "num": 5, |
| "api_key": "your_serpapi_key" |
| }) |
| results = search.get_dict()["organic_results"] |
| return results |
| |
| |