Spaces:

ashishbangwal
/

Investor-API

Build error

App Files Files Community

Investor-API / utils /HelperFunctions.py

ashishbangwal

latest updates to investor agent [UI + backend-logic change]

206ef5f over 1 year ago

raw

history blame contribute delete

2.74 kB

	import tiktoken
	from typing import List
	import hashlib
	import re
	import sqlite3

	import sqlite3
	import json


	def save_to_database(_id, data):
	# Connect to the SQLite database (or create it if it doesn't exist)
	conn = sqlite3.connect("utils/information.db")
	cursor = conn.cursor()

	# Create the table if it doesn't exist
	cursor.execute(
	"""
	CREATE TABLE IF NOT EXISTS json_data (
	id TEXT PRIMARY KEY,
	data TEXT
	)
	"""
	)

	# Insert or replace the data
	cursor.execute(
	"""
	INSERT OR REPLACE INTO json_data (id, data)
	VALUES (?, ?)
	""",
	(_id, json.dumps(data)),
	)

	# Commit the changes and close the connection
	conn.commit()
	conn.close()


	def retrieve_from_database(_id):
	conn = sqlite3.connect("utils/information.db")
	cursor = conn.cursor()

	cursor.execute("SELECT data FROM json_data WHERE id = ?", (_id,))
	result = cursor.fetchone()

	conn.close()

	if result:
	return json.loads(result[0])
	else:
	return None


	def generate_file_id(file_bytes: bytes) -> str:
	"""Generate a Unique file ID for given file."""

	hash_obj = hashlib.sha256()
	hash_obj.update(file_bytes[:4096])
	file_id = hash_obj.hexdigest()[:63]
	return str(file_id)


	def extract_content(text):
	pattern = r"<report-chart>(.*?)</report-chart>"
	matches = re.findall(pattern, text, re.DOTALL)
	return matches[0]


	def CountTokens(texts: List[str]) -> List[int]:
	"""
	Calculate the number of tokens in a batch of strings.
	"""
	model = tiktoken.encoding_for_model("gpt-3.5-turbo")
	encodings = model.encode_batch(texts)
	num_of_tokens = [len(encoding) for encoding in encodings]
	return num_of_tokens


	def web_search_result_processor(output):
	"""report_html = output.get("report", "")
	references = output.get("references", {})
	references_markdown = ""

	for url, content in references.items():
	# Making the URL clickable in pure HTML
	clickable_url = f'<a href="{url}">{url}</a>'
	references_markdown += f"<details><summary>{clickable_url}</summary>\n\n{html2text.html2text(content)}</details>\n\n"

	combined_markdown = ""
	if report_html.strip(): # Check if report_html is not empty
	# Use html2text to convert HTML to Markdown, ensuring it doesn't break lines unnecessarily
	report_markdown = html2text.html2text(report_html)
	# Remove unwanted newlines within Markdown headings
	report_markdown = report_markdown.replace("\n", " ").replace(" ", "\n")
	combined_markdown += report_markdown + "\n\n"
	combined_markdown += references_markdown"""
	r = extract_content(output)
	return r