Investor-API / utils /HelperFunctions.py
ashishbangwal's picture
latest updates to investor agent [UI + backend-logic change]
206ef5f
import tiktoken
from typing import List
import hashlib
import re
import sqlite3
import sqlite3
import json
def save_to_database(_id, data):
# Connect to the SQLite database (or create it if it doesn't exist)
conn = sqlite3.connect("utils/information.db")
cursor = conn.cursor()
# Create the table if it doesn't exist
cursor.execute(
"""
CREATE TABLE IF NOT EXISTS json_data (
id TEXT PRIMARY KEY,
data TEXT
)
"""
)
# Insert or replace the data
cursor.execute(
"""
INSERT OR REPLACE INTO json_data (id, data)
VALUES (?, ?)
""",
(_id, json.dumps(data)),
)
# Commit the changes and close the connection
conn.commit()
conn.close()
def retrieve_from_database(_id):
conn = sqlite3.connect("utils/information.db")
cursor = conn.cursor()
cursor.execute("SELECT data FROM json_data WHERE id = ?", (_id,))
result = cursor.fetchone()
conn.close()
if result:
return json.loads(result[0])
else:
return None
def generate_file_id(file_bytes: bytes) -> str:
"""Generate a Unique file ID for given file."""
hash_obj = hashlib.sha256()
hash_obj.update(file_bytes[:4096])
file_id = hash_obj.hexdigest()[:63]
return str(file_id)
def extract_content(text):
pattern = r"<report-chart>(.*?)</report-chart>"
matches = re.findall(pattern, text, re.DOTALL)
return matches[0]
def CountTokens(texts: List[str]) -> List[int]:
"""
Calculate the number of tokens in a batch of strings.
"""
model = tiktoken.encoding_for_model("gpt-3.5-turbo")
encodings = model.encode_batch(texts)
num_of_tokens = [len(encoding) for encoding in encodings]
return num_of_tokens
def web_search_result_processor(output):
"""report_html = output.get("report", "")
references = output.get("references", {})
references_markdown = ""
for url, content in references.items():
# Making the URL clickable in pure HTML
clickable_url = f'<a href="{url}">{url}</a>'
references_markdown += f"<details><summary>{clickable_url}</summary>\n\n{html2text.html2text(content)}</details>\n\n"
combined_markdown = ""
if report_html.strip(): # Check if report_html is not empty
# Use html2text to convert HTML to Markdown, ensuring it doesn't break lines unnecessarily
report_markdown = html2text.html2text(report_html)
# Remove unwanted newlines within Markdown headings
report_markdown = report_markdown.replace("\n", " ").replace(" ", "\n")
combined_markdown += report_markdown + "\n\n"
combined_markdown += references_markdown"""
r = extract_content(output)
return r