jawadsaghir12's picture
new
0887862
import io
import logging
import os
import zipfile
import xml.etree.ElementTree as ET
import ssl
import asyncio
import functools
from typing import List, Optional
from googleapiclient.errors import HttpError
from .api_enablement import get_api_enablement_message
from auth.google_auth import GoogleAuthenticationError
logger = logging.getLogger(__name__)
class TransientNetworkError(Exception):
"""Custom exception for transient network errors after retries."""
pass
class UserInputError(Exception):
"""Raised for user-facing input/validation errors that shouldn't be retried."""
pass
def check_credentials_directory_permissions(credentials_dir: str = None) -> None:
"""
Check if the service has appropriate permissions to create and write to the .credentials directory.
Args:
credentials_dir: Path to the credentials directory (default: uses get_default_credentials_dir())
Raises:
PermissionError: If the service lacks necessary permissions
OSError: If there are other file system issues
"""
if credentials_dir is None:
from auth.google_auth import get_default_credentials_dir
credentials_dir = get_default_credentials_dir()
try:
# Check if directory exists
if os.path.exists(credentials_dir):
# Directory exists, check if we can write to it
test_file = os.path.join(credentials_dir, ".permission_test")
try:
with open(test_file, "w") as f:
f.write("test")
os.remove(test_file)
logger.info(
f"Credentials directory permissions check passed: {os.path.abspath(credentials_dir)}"
)
except (PermissionError, OSError) as e:
raise PermissionError(
f"Cannot write to existing credentials directory '{os.path.abspath(credentials_dir)}': {e}"
)
else:
# Directory doesn't exist, try to create it and its parent directories
try:
os.makedirs(credentials_dir, exist_ok=True)
# Test writing to the new directory
test_file = os.path.join(credentials_dir, ".permission_test")
with open(test_file, "w") as f:
f.write("test")
os.remove(test_file)
logger.info(
f"Created credentials directory with proper permissions: {os.path.abspath(credentials_dir)}"
)
except (PermissionError, OSError) as e:
# Clean up if we created the directory but can't write to it
try:
if os.path.exists(credentials_dir):
os.rmdir(credentials_dir)
except (PermissionError, OSError):
pass
raise PermissionError(
f"Cannot create or write to credentials directory '{os.path.abspath(credentials_dir)}': {e}"
)
except PermissionError:
raise
except Exception as e:
raise OSError(
f"Unexpected error checking credentials directory permissions: {e}"
)
def extract_office_xml_text(file_bytes: bytes, mime_type: str) -> Optional[str]:
"""
Very light-weight XML scraper for Word, Excel, PowerPoint files.
Returns plain-text if something readable is found, else None.
No external deps – just std-lib zipfile + ElementTree.
"""
shared_strings: List[str] = []
ns_excel_main = "http://schemas.openxmlformats.org/spreadsheetml/2006/main"
try:
with zipfile.ZipFile(io.BytesIO(file_bytes)) as zf:
targets: List[str] = []
# Map MIME → iterable of XML files to inspect
if (
mime_type
== "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
):
targets = ["word/document.xml"]
elif (
mime_type
== "application/vnd.openxmlformats-officedocument.presentationml.presentation"
):
targets = [n for n in zf.namelist() if n.startswith("ppt/slides/slide")]
elif (
mime_type
== "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
):
targets = [
n
for n in zf.namelist()
if n.startswith("xl/worksheets/sheet") and "drawing" not in n
]
# Attempt to parse sharedStrings.xml for Excel files
try:
shared_strings_xml = zf.read("xl/sharedStrings.xml")
shared_strings_root = ET.fromstring(shared_strings_xml)
for si_element in shared_strings_root.findall(
f"{{{ns_excel_main}}}si"
):
text_parts = []
# Find all <t> elements, simple or within <r> runs, and concatenate their text
for t_element in si_element.findall(f".//{{{ns_excel_main}}}t"):
if t_element.text:
text_parts.append(t_element.text)
shared_strings.append("".join(text_parts))
except KeyError:
logger.info(
"No sharedStrings.xml found in Excel file (this is optional)."
)
except ET.ParseError as e:
logger.error(f"Error parsing sharedStrings.xml: {e}")
except (
Exception
) as e: # Catch any other unexpected error during sharedStrings parsing
logger.error(
f"Unexpected error processing sharedStrings.xml: {e}",
exc_info=True,
)
else:
return None
pieces: List[str] = []
for member in targets:
try:
xml_content = zf.read(member)
xml_root = ET.fromstring(xml_content)
member_texts: List[str] = []
if (
mime_type
== "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
):
for cell_element in xml_root.findall(
f".//{{{ns_excel_main}}}c"
): # Find all <c> elements
value_element = cell_element.find(
f"{{{ns_excel_main}}}v"
) # Find <v> under <c>
# Skip if cell has no value element or value element has no text
if value_element is None or value_element.text is None:
continue
cell_type = cell_element.get("t")
if cell_type == "s": # Shared string
try:
ss_idx = int(value_element.text)
if 0 <= ss_idx < len(shared_strings):
member_texts.append(shared_strings[ss_idx])
else:
logger.warning(
f"Invalid shared string index {ss_idx} in {member}. Max index: {len(shared_strings) - 1}"
)
except ValueError:
logger.warning(
f"Non-integer shared string index: '{value_element.text}' in {member}."
)
else: # Direct value (number, boolean, inline string if not 's')
member_texts.append(value_element.text)
else: # Word or PowerPoint
for elem in xml_root.iter():
# For Word: <w:t> where w is "http://schemas.openxmlformats.org/wordprocessingml/2006/main"
# For PowerPoint: <a:t> where a is "http://schemas.openxmlformats.org/drawingml/2006/main"
if (
elem.tag.endswith("}t") and elem.text
): # Check for any namespaced tag ending with 't'
cleaned_text = elem.text.strip()
if (
cleaned_text
): # Add only if there's non-whitespace text
member_texts.append(cleaned_text)
if member_texts:
pieces.append(
" ".join(member_texts)
) # Join texts from one member with spaces
except ET.ParseError as e:
logger.warning(
f"Could not parse XML in member '{member}' for {mime_type} file: {e}"
)
except Exception as e:
logger.error(
f"Error processing member '{member}' for {mime_type}: {e}",
exc_info=True,
)
# continue processing other members
if not pieces: # If no text was extracted at all
return None
# Join content from different members (sheets/slides) with double newlines for separation
text = "\n\n".join(pieces).strip()
return text or None # Ensure None is returned if text is empty after strip
except zipfile.BadZipFile:
logger.warning(f"File is not a valid ZIP archive (mime_type: {mime_type}).")
return None
except (
ET.ParseError
) as e: # Catch parsing errors at the top level if zipfile itself is XML-like
logger.error(f"XML parsing error at a high level for {mime_type}: {e}")
return None
except Exception as e:
logger.error(
f"Failed to extract office XML text for {mime_type}: {e}", exc_info=True
)
return None
def handle_http_errors(
tool_name: str, is_read_only: bool = False, service_type: Optional[str] = None
):
"""
A decorator to handle Google API HttpErrors and transient SSL errors in a standardized way.
It wraps a tool function, catches HttpError, logs a detailed error message,
and raises a generic Exception with a user-friendly message.
If is_read_only is True, it will also catch ssl.SSLError and retry with
exponential backoff. After exhausting retries, it raises a TransientNetworkError.
Args:
tool_name (str): The name of the tool being decorated (e.g., 'list_calendars').
is_read_only (bool): If True, the operation is considered safe to retry on
transient network errors. Defaults to False.
service_type (str): Optional. The Google service type (e.g., 'calendar', 'gmail').
"""
def decorator(func):
@functools.wraps(func)
async def wrapper(*args, **kwargs):
max_retries = 3
base_delay = 1
for attempt in range(max_retries):
try:
return await func(*args, **kwargs)
except ssl.SSLError as e:
if is_read_only and attempt < max_retries - 1:
delay = base_delay * (2**attempt)
logger.warning(
f"SSL error in {tool_name} on attempt {attempt + 1}: {e}. Retrying in {delay} seconds..."
)
await asyncio.sleep(delay)
else:
logger.error(
f"SSL error in {tool_name} on final attempt: {e}. Raising exception."
)
raise TransientNetworkError(
f"A transient SSL error occurred in '{tool_name}' after {max_retries} attempts. "
"This is likely a temporary network or certificate issue. Please try again shortly."
) from e
except UserInputError as e:
message = f"Input error in {tool_name}: {e}"
logger.warning(message)
raise e
except HttpError as error:
user_google_email = kwargs.get("user_google_email", "N/A")
error_details = str(error)
# Check if this is an API not enabled error
if (
error.resp.status == 403
and ("accessNotConfigured" in error_details or "SERVICE_DISABLED" in error_details)
):
enablement_msg = get_api_enablement_message(
error_details, service_type
)
if enablement_msg:
message = (
f"API error in {tool_name}: {enablement_msg}\n\n"
f"User: {user_google_email}"
)
else:
message = (
f"API error in {tool_name}: {error}. "
f"The required API is not enabled for your project. "
f"Please check the Google Cloud Console to enable it."
)
elif error.resp.status in [401, 403]:
# Authentication/authorization errors
message = (
f"API error in {tool_name}: {error}. "
f"You might need to re-authenticate for user '{user_google_email}'. "
f"LLM: Try 'start_google_auth' with the user's email and the appropriate service_name."
)
else:
# Other HTTP errors (400 Bad Request, etc.) - don't suggest re-auth
message = f"API error in {tool_name}: {error}"
logger.error(f"API error in {tool_name}: {error}", exc_info=True)
raise Exception(message) from error
except TransientNetworkError:
# Re-raise without wrapping to preserve the specific error type
raise
except GoogleAuthenticationError:
# Re-raise authentication errors without wrapping
raise
except Exception as e:
message = f"An unexpected error occurred in {tool_name}: {e}"
logger.exception(message)
raise Exception(message) from e
return wrapper
return decorator