Spaces:

Tejasw1
/

Statute_Prediction

Paused

App Files Files Community

Statute_Prediction / venv /lib /python3.11 /site-packages /gitdb /db /loose.py

Tejasw1

Upload folder using huggingface_hub

2d1874a verified almost 2 years ago

raw

history blame contribute delete

8.04 kB

	# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
	#
	# This module is part of GitDB and is released under
	# the New BSD License: https://opensource.org/license/bsd-3-clause/
	from gitdb.db.base import (
	FileDBBase,
	ObjectDBR,
	ObjectDBW
	)

	from gitdb.exc import (
	BadObject,
	AmbiguousObjectName
	)

	from gitdb.stream import (
	DecompressMemMapReader,
	FDCompressedSha1Writer,
	FDStream,
	Sha1Writer
	)

	from gitdb.base import (
	OStream,
	OInfo
	)

	from gitdb.util import (
	file_contents_ro_filepath,
	ENOENT,
	hex_to_bin,
	bin_to_hex,
	exists,
	chmod,
	isfile,
	remove,
	rename,
	dirname,
	basename,
	join
	)

	from gitdb.fun import (
	chunk_size,
	loose_object_header_info,
	write_object,
	stream_copy
	)

	from gitdb.utils.encoding import force_bytes

	import tempfile
	import os
	import sys


	__all__ = ('LooseObjectDB', )


	class LooseObjectDB(FileDBBase, ObjectDBR, ObjectDBW):

	"""A database which operates on loose object files"""

	# CONFIGURATION
	# chunks in which data will be copied between streams
	stream_chunk_size = chunk_size

	# On windows we need to keep it writable, otherwise it cannot be removed
	# either
	new_objects_mode = int("444", 8)
	if os.name == 'nt':
	new_objects_mode = int("644", 8)

	def __init__(self, root_path):
	super().__init__(root_path)
	self._hexsha_to_file = dict()
	# Additional Flags - might be set to 0 after the first failure
	# Depending on the root, this might work for some mounts, for others not, which
	# is why it is per instance
	self._fd_open_flags = getattr(os, 'O_NOATIME', 0)

	#{ Interface
	def object_path(self, hexsha):
	"""
	:return: path at which the object with the given hexsha would be stored,
	relative to the database root"""
	return join(hexsha[:2], hexsha[2:])

	def readable_db_object_path(self, hexsha):
	"""
	:return: readable object path to the object identified by hexsha
	:raise BadObject: If the object file does not exist"""
	try:
	return self._hexsha_to_file[hexsha]
	except KeyError:
	pass
	# END ignore cache misses

	# try filesystem
	path = self.db_path(self.object_path(hexsha))
	if exists(path):
	self._hexsha_to_file[hexsha] = path
	return path
	# END handle cache
	raise BadObject(hexsha)

	def partial_to_complete_sha_hex(self, partial_hexsha):
	""":return: 20 byte binary sha1 string which matches the given name uniquely
	:param name: hexadecimal partial name (bytes or ascii string)
	:raise AmbiguousObjectName:
	:raise BadObject: """
	candidate = None
	for binsha in self.sha_iter():
	if bin_to_hex(binsha).startswith(force_bytes(partial_hexsha)):
	# it can't ever find the same object twice
	if candidate is not None:
	raise AmbiguousObjectName(partial_hexsha)
	candidate = binsha
	# END for each object
	if candidate is None:
	raise BadObject(partial_hexsha)
	return candidate

	#} END interface

	def _map_loose_object(self, sha):
	"""
	:return: memory map of that file to allow random read access
	:raise BadObject: if object could not be located"""
	db_path = self.db_path(self.object_path(bin_to_hex(sha)))
	try:
	return file_contents_ro_filepath(db_path, flags=self._fd_open_flags)
	except OSError as e:
	if e.errno != ENOENT:
	# try again without noatime
	try:
	return file_contents_ro_filepath(db_path)
	except OSError as new_e:
	raise BadObject(sha) from new_e
	# didn't work because of our flag, don't try it again
	self._fd_open_flags = 0
	else:
	raise BadObject(sha) from e
	# END handle error
	# END exception handling

	def set_ostream(self, stream):
	""":raise TypeError: if the stream does not support the Sha1Writer interface"""
	if stream is not None and not isinstance(stream, Sha1Writer):
	raise TypeError("Output stream musst support the %s interface" % Sha1Writer.__name__)
	return super().set_ostream(stream)

	def info(self, sha):
	m = self._map_loose_object(sha)
	try:
	typ, size = loose_object_header_info(m)
	return OInfo(sha, typ, size)
	finally:
	if hasattr(m, 'close'):
	m.close()
	# END assure release of system resources

	def stream(self, sha):
	m = self._map_loose_object(sha)
	type, size, stream = DecompressMemMapReader.new(m, close_on_deletion=True)
	return OStream(sha, type, size, stream)

	def has_object(self, sha):
	try:
	self.readable_db_object_path(bin_to_hex(sha))
	return True
	except BadObject:
	return False
	# END check existence

	def store(self, istream):
	"""note: The sha we produce will be hex by nature"""
	tmp_path = None
	writer = self.ostream()
	if writer is None:
	# open a tmp file to write the data to
	fd, tmp_path = tempfile.mkstemp(prefix='obj', dir=self._root_path)

	if istream.binsha is None:
	writer = FDCompressedSha1Writer(fd)
	else:
	writer = FDStream(fd)
	# END handle direct stream copies
	# END handle custom writer

	try:
	try:
	if istream.binsha is not None:
	# copy as much as possible, the actual uncompressed item size might
	# be smaller than the compressed version
	stream_copy(istream.read, writer.write, sys.maxsize, self.stream_chunk_size)
	else:
	# write object with header, we have to make a new one
	write_object(istream.type, istream.size, istream.read, writer.write,
	chunk_size=self.stream_chunk_size)
	# END handle direct stream copies
	finally:
	if tmp_path:
	writer.close()
	# END assure target stream is closed
	except:
	if tmp_path:
	os.remove(tmp_path)
	raise
	# END assure tmpfile removal on error

	hexsha = None
	if istream.binsha:
	hexsha = istream.hexsha
	else:
	hexsha = writer.sha(as_hex=True)
	# END handle sha

	if tmp_path:
	obj_path = self.db_path(self.object_path(hexsha))
	obj_dir = dirname(obj_path)
	os.makedirs(obj_dir, exist_ok=True)
	# END handle destination directory
	# rename onto existing doesn't work on NTFS
	if isfile(obj_path):
	remove(tmp_path)
	else:
	rename(tmp_path, obj_path)
	# end rename only if needed

	# make sure its readable for all ! It started out as rw-- tmp file
	# but needs to be rwrr
	chmod(obj_path, self.new_objects_mode)
	# END handle dry_run

	istream.binsha = hex_to_bin(hexsha)
	return istream

	def sha_iter(self):
	# find all files which look like an object, extract sha from there
	for root, dirs, files in os.walk(self.root_path()):
	root_base = basename(root)
	if len(root_base) != 2:
	continue

	for f in files:
	if len(f) != 38:
	continue
	yield hex_to_bin(root_base + f)
	# END for each file
	# END for each walk iteration

	def size(self):
	return len(tuple(self.sha_iter()))