Spaces:

karthigrj
/

xforms

Sleeping

App Files Files Community

xforms / app.py

karthigrj

Update app.py

30e077e verified about 1 year ago

raw

history blame contribute delete

6.7 kB

	import re
	import streamlit as st
	from langchain_groq import ChatGroq

	# Set up the title of the Streamlit app
	st.title("Code Analyzer & Migrator for SQL, PL-SQL, and BTEQ Files")

	# Sidebar for file upload, code language selection, and submit button
	st.sidebar.header("Upload your Document")
	uploaded_file = st.sidebar.file_uploader("Choose a file", type=["sql", "bteq"])
	language_choice = st.sidebar.selectbox("Convert code to:", ["Select a language", "pyspark"])
	submit_button = st.sidebar.button("Submit")

	# Initialize the LLM with Groq API
	groq_api_key = "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"
	llm = ChatGroq(groq_api_key=groq_api_key, model_name="Gemma2-9b-It")


	def llm_extract_sql_elements(content):
	# Request LLM for analysis of SQL elements
	prompt = (
	f"Analyze the following BTEQ code and generate a comprehensive Code Discovery Report for migration planning. "
	f"Report should include:\n\n"
	f"1. Metric-Based Summary - Use counts and examples:\n"
	f" - Number of tables (e.g., `5 tables: Users, Orders, Products, Inventory, Sales`)\n"
	f" - Number of views (e.g., `2 views: DailySales, MonthlyRevenue`)\n"
	f" - CREATE statements count (e.g., `3 CREATE TABLE statements`)\n"
	f" - SELECT statements count (e.g., `8 SELECT queries`)\n"
	f" - JOIN operations count (e.g., `4 JOIN operations`)\n"
	f" - Any syntax issues (e.g., `1 unmatched parenthesis`)\n\n"
	f"2. Discrepancy Checks - Identify potential issues:\n"
	f" - Syntax issues, deprecated functions, or non-standard practices\n"
	f" - Examples: Unclosed quotes, missing semicolons\n\n"
	f"3. Key Observations & Learnings:\n"
	f" - Highlight repetitive operations or inefficiencies (e.g., `Repetitive SELECT * usage`)\n"
	f" - Provide 2-3 short recommendations for the migration to PySpark (e.g., "
	f"`Consider using DataFrames for JOIN operations`)\n\n"
	f"Code:\n{content}"
	)
	response = llm.predict(prompt)
	return response




	def llm_convert_code(content, target_language):
	# Request LLM for conversion with validation instructions
	prompt = (
	f"Convert the following BTEQ code to fully functional {target_language} code, ensuring that:\n"
	f"1. All BTEQ-specific commands are accurately translated to equivalent {target_language} constructs.\n"
	f"2. SQL operations (e.g., SELECT, JOIN, WHERE) are migrated using appropriate {target_language} libraries (e.g., PySpark SQL DataFrame API for PySpark).\n"
	f"3. All syntax and logic are correct and executable in {target_language} without modifications.\n\n"
	f"After the conversion, validate the {target_language} code to confirm its correctness.\n\n"
	f"Examples of required conversions:\n"
	f"- BTEQ SELECT statements should map to PySpark DataFrame `select()` functions.\n"
	f"- Error handling in BTEQ (e.g., `.IF ERRORCODE`) should be replaced with equivalent exception handling.\n"
	f"- Explicit casting, joins, and aggregations should follow {target_language} best practices.\n\n"
	f"Please ensure that all transformed code is structured, efficient, and ready for production use in {target_language}.\n\n"
	f"Code to convert:\n{content}"
	"example output code:"
	'''from pyspark.sql import SparkSession
	from pyspark.sql.functions import col, countDistinct

	# Initialize SparkSession
	spark = SparkSession.builder.appName("SimplePySparkExample").getOrCreate()

	# Sample data for two DataFrames
	data1 = [(1, "Alice", "Sales", 5000), (2, "Bob", "HR", 4000), (3, "Charlie", "IT", 6000)]
	data2 = [(1, "Sales", "New York"), (2, "HR", "Los Angeles"), (3, "IT", "San Francisco")]

	# Create DataFrames
	df1 = spark.createDataFrame(data1, ["id", "name", "department", "salary"])
	df2 = spark.createDataFrame(data2, ["dept_id", "department", "location"])

	# Selecting specific columns and filtering rows
	selected_df = df1.select("name", "department", "salary").filter(col("salary") > 4500)

	# Joining DataFrames on a common column
	joined_df = selected_df.join(df2, df1.department == df2.department, "inner")

	# Aggregation: Count distinct departments and get the average salary by department
	agg_df = joined_df.groupBy("department").agg(countDistinct("name").alias("distinct_names"), avg("salary").alias("avg_salary"))

	# Show the final result
	agg_df.show()

	# Stop SparkSession
	spark.stop()
	'''
	f"Note: output must contain only the converted pyspark (python format)code and no other pre or post text"
	)
	converted_code = llm.predict(prompt)
	return converted_code



	# Display content and analysis if a file is uploaded and the submit button is clicked
	if submit_button and uploaded_file is not None:
	# Read the uploaded file
	file_content = uploaded_file.read().decode("utf-8")

	# Display the uploaded document content in the main area
	st.subheader("Uploaded Document Content:")
	st.text_area("Document Content", file_content, height=300)

	# Basic content analysis
	st.subheader("Basic Analysis")
	line_count = len(file_content.splitlines())
	word_count = len(file_content.split())
	st.write(f"Line Count: {line_count}")
	# st.write(f"Word Count: {word_count}")

	# Extract SQL elements via LLM
	st.subheader("Code Discovery Report (LLM-Enhanced)")
	sql_analysis = llm_extract_sql_elements(file_content)
	st.write(sql_analysis)

	# Code conversion if a valid language is selected
	if language_choice != "Select a language":
	st.subheader(f"Code Conversion to {language_choice.capitalize()}")
	converted_code = llm_convert_code(file_content, language_choice)

	# Display the converted code
	st.text_area("Converted Code", converted_code, height=300)

	# Option to download the converted code as a text file
	st.download_button(
	label="Download Converted Code",
	data=converted_code,
	file_name=f"converted_code_{language_choice}.py",
	mime="text/plain"
	)

	elif submit_button and uploaded_file is None:
	st.warning("Please upload a file before submitting.")
	else:
	st.info("Upload a document in the sidebar, select the target language, and click Submit to analyze.")