Spaces:
Build error
Build error
| import tiktoken | |
| from typing import List | |
| import hashlib | |
| import re | |
| import sqlite3 | |
| import sqlite3 | |
| import json | |
| def save_to_database(_id, data): | |
| # Connect to the SQLite database (or create it if it doesn't exist) | |
| conn = sqlite3.connect("utils/information.db") | |
| cursor = conn.cursor() | |
| # Create the table if it doesn't exist | |
| cursor.execute( | |
| """ | |
| CREATE TABLE IF NOT EXISTS json_data ( | |
| id TEXT PRIMARY KEY, | |
| data TEXT | |
| ) | |
| """ | |
| ) | |
| # Insert or replace the data | |
| cursor.execute( | |
| """ | |
| INSERT OR REPLACE INTO json_data (id, data) | |
| VALUES (?, ?) | |
| """, | |
| (_id, json.dumps(data)), | |
| ) | |
| # Commit the changes and close the connection | |
| conn.commit() | |
| conn.close() | |
| def retrieve_from_database(_id): | |
| conn = sqlite3.connect("utils/information.db") | |
| cursor = conn.cursor() | |
| cursor.execute("SELECT data FROM json_data WHERE id = ?", (_id,)) | |
| result = cursor.fetchone() | |
| conn.close() | |
| if result: | |
| return json.loads(result[0]) | |
| else: | |
| return None | |
| def generate_file_id(file_bytes: bytes) -> str: | |
| """Generate a Unique file ID for given file.""" | |
| hash_obj = hashlib.sha256() | |
| hash_obj.update(file_bytes[:4096]) | |
| file_id = hash_obj.hexdigest()[:63] | |
| return str(file_id) | |
| def extract_content(text): | |
| pattern = r"<report-chart>(.*?)</report-chart>" | |
| matches = re.findall(pattern, text, re.DOTALL) | |
| return matches[0] | |
| def CountTokens(texts: List[str]) -> List[int]: | |
| """ | |
| Calculate the number of tokens in a batch of strings. | |
| """ | |
| model = tiktoken.encoding_for_model("gpt-3.5-turbo") | |
| encodings = model.encode_batch(texts) | |
| num_of_tokens = [len(encoding) for encoding in encodings] | |
| return num_of_tokens | |
| def web_search_result_processor(output): | |
| """report_html = output.get("report", "") | |
| references = output.get("references", {}) | |
| references_markdown = "" | |
| for url, content in references.items(): | |
| # Making the URL clickable in pure HTML | |
| clickable_url = f'<a href="{url}">{url}</a>' | |
| references_markdown += f"<details><summary>{clickable_url}</summary>\n\n{html2text.html2text(content)}</details>\n\n" | |
| combined_markdown = "" | |
| if report_html.strip(): # Check if report_html is not empty | |
| # Use html2text to convert HTML to Markdown, ensuring it doesn't break lines unnecessarily | |
| report_markdown = html2text.html2text(report_html) | |
| # Remove unwanted newlines within Markdown headings | |
| report_markdown = report_markdown.replace("\n", " ").replace(" ", "\n") | |
| combined_markdown += report_markdown + "\n\n" | |
| combined_markdown += references_markdown""" | |
| r = extract_content(output) | |
| return r | |