Spaces:

Soumik555
/

FastApi

Running

FastApi / util_service.py

Soumik Bose

reverted

16ffb15 4 months ago

4.55 kB

	from langchain_core.prompts import ChatPromptTemplate
	import numpy as np

	keywords = ["i encountered", "429", "unfortunately", "unexpected character", "unsupported", "error", "sorry", "response", "unable", "because", "too many"]

	def contains_keywords(text, keywords):
	return any(keyword.lower() in text.lower() for keyword in keywords)

	def process_answer(answer):
	# Handle empty list scenario
	if isinstance(answer, list) and not answer:
	return "Empty response received."

	# Handle dictionary with possible "answer" or "error"
	if isinstance(answer, dict):
	if "answer" in answer and contains_keywords(answer["answer"], keywords):
	return True
	if "error" in answer and contains_keywords(answer["error"], keywords):
	return True

	# Default case if no keywords are found
	return False


	def _prompt_generator(question: str, chart_required: bool, csv_url: str):

	chat_prompt = f"""You are a senior data analyst working with CSV data. Adhere strictly to the following guidelines:

	1. Data Verification: Always inspect the data with `.sample(5).to_dict()` before performing any analysis.
	2. Data Integrity: Ensure proper handling of null values to maintain accuracy and reliability.
	3. Communication: Provide concise, professional, and well-structured responses.
	4. Avoid including any internal processing details or references to the methods used to generate your response (ex: based on the tool call, using the function -> These types of phrases.)
	5. Always use pd.read_csv({csv_url}) to read the CSV file.
	6. Full Dataset Interpretation Rule: When asked about the entire dataset, provide a concise summary of the dataset's structure, such as the number of rows, columns, and data types.
	(Example: “give me total number of columns”, “show all statistics”, “full summary”, etc.)

	Query: {question}

	"""

	chart_prompt = f"""You are a senior data analyst working with CSV data. Follow these rules STRICTLY:

	1. Generate ONE unique identifier FIRST using: unique_id = uuid.uuid4().hex
	2. Visualization requirements:
	- Adjust font sizes, rotate labels (45° if needed), truncate for readability
	- Figure size: (12, 6)
	- Descriptive titles (fontsize=14)
	- Colorblind-friendly palettes
	- Do not use any visualization library other than matplotlib or seaborn
	3. File handling rules:
	- Create MAXIMUM 2 charts if absolutely necessary
	- For multiple charts:
	* Arrange in grid format (2x1 vertical layout preferred)
	* Use SAME unique_id with suffixes:
	- f"{{unique_id}}_1.png"
	- f"{{unique_id}}_2.png"
	- Save EXCLUSIVELY to "generated_charts" folder
	- File naming: f"chart_{{unique_id}}.png" (for single chart)
	4. FINAL OUTPUT MUST BE:
	- For single chart: f"generated_charts/chart_{{unique_id}}.png"
	- For multiple charts: f"generated_charts/chart_{{unique_id}}.png" (combined grid image)
	- ONLY return this full path string, nothing else

	Query: {question}

	IMPORTANT:
	- Generate the unique_id FIRST before any operations
	- Use THE SAME unique_id throughout entire process
	- NEVER generate new UUIDs after initial creation
	- Return EXACT filepath string of the final saved chart
	- Always use pd.read_csv({csv_url}) to read the CSV file
	"""


	if chart_required:
	return ChatPromptTemplate.from_template(chart_prompt)
	else:
	return ChatPromptTemplate.from_template(chat_prompt)



	def handle_out_of_range_float(value):
	if isinstance(value, float):
	if np.isnan(value):
	return None
	elif np.isinf(value):
	return "Infinity"
	return value