File size: 6,696 Bytes
30e077e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
import re
import streamlit as st
from langchain_groq import ChatGroq
# Set up the title of the Streamlit app
st.title("Code Analyzer & Migrator for SQL, PL-SQL, and BTEQ Files")
# Sidebar for file upload, code language selection, and submit button
st.sidebar.header("Upload your Document")
uploaded_file = st.sidebar.file_uploader("Choose a file", type=["sql", "bteq"])
language_choice = st.sidebar.selectbox("Convert code to:", ["Select a language", "pyspark"])
submit_button = st.sidebar.button("Submit")
# Initialize the LLM with Groq API
groq_api_key = "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"
llm = ChatGroq(groq_api_key=groq_api_key, model_name="Gemma2-9b-It")
def llm_extract_sql_elements(content):
# Request LLM for analysis of SQL elements
prompt = (
f"Analyze the following BTEQ code and generate a comprehensive Code Discovery Report for migration planning. "
f"Report should include:\n\n"
f"1. **Metric-Based Summary** - Use counts and examples:\n"
f" - Number of tables (e.g., `5 tables: Users, Orders, Products, Inventory, Sales`)\n"
f" - Number of views (e.g., `2 views: DailySales, MonthlyRevenue`)\n"
f" - CREATE statements count (e.g., `3 CREATE TABLE statements`)\n"
f" - SELECT statements count (e.g., `8 SELECT queries`)\n"
f" - JOIN operations count (e.g., `4 JOIN operations`)\n"
f" - Any syntax issues (e.g., `1 unmatched parenthesis`)\n\n"
f"2. **Discrepancy Checks** - Identify potential issues:\n"
f" - Syntax issues, deprecated functions, or non-standard practices\n"
f" - Examples: Unclosed quotes, missing semicolons\n\n"
f"3. **Key Observations & Learnings**:\n"
f" - Highlight repetitive operations or inefficiencies (e.g., `Repetitive SELECT * usage`)\n"
f" - Provide 2-3 short recommendations for the migration to PySpark (e.g., "
f"`Consider using DataFrames for JOIN operations`)\n\n"
f"Code:\n{content}"
)
response = llm.predict(prompt)
return response
def llm_convert_code(content, target_language):
# Request LLM for conversion with validation instructions
prompt = (
f"Convert the following BTEQ code to fully functional {target_language} code, ensuring that:\n"
f"1. All BTEQ-specific commands are accurately translated to equivalent {target_language} constructs.\n"
f"2. SQL operations (e.g., SELECT, JOIN, WHERE) are migrated using appropriate {target_language} libraries (e.g., PySpark SQL DataFrame API for PySpark).\n"
f"3. All syntax and logic are correct and executable in {target_language} without modifications.\n\n"
f"After the conversion, validate the {target_language} code to confirm its correctness.\n\n"
f"Examples of required conversions:\n"
f"- BTEQ SELECT statements should map to PySpark DataFrame `select()` functions.\n"
f"- Error handling in BTEQ (e.g., `.IF ERRORCODE`) should be replaced with equivalent exception handling.\n"
f"- Explicit casting, joins, and aggregations should follow {target_language} best practices.\n\n"
f"Please ensure that all transformed code is structured, efficient, and ready for production use in {target_language}.\n\n"
f"Code to convert:\n{content}"
"example output code:"
'''from pyspark.sql import SparkSession
from pyspark.sql.functions import col, countDistinct
# Initialize SparkSession
spark = SparkSession.builder.appName("SimplePySparkExample").getOrCreate()
# Sample data for two DataFrames
data1 = [(1, "Alice", "Sales", 5000), (2, "Bob", "HR", 4000), (3, "Charlie", "IT", 6000)]
data2 = [(1, "Sales", "New York"), (2, "HR", "Los Angeles"), (3, "IT", "San Francisco")]
# Create DataFrames
df1 = spark.createDataFrame(data1, ["id", "name", "department", "salary"])
df2 = spark.createDataFrame(data2, ["dept_id", "department", "location"])
# Selecting specific columns and filtering rows
selected_df = df1.select("name", "department", "salary").filter(col("salary") > 4500)
# Joining DataFrames on a common column
joined_df = selected_df.join(df2, df1.department == df2.department, "inner")
# Aggregation: Count distinct departments and get the average salary by department
agg_df = joined_df.groupBy("department").agg(countDistinct("name").alias("distinct_names"), avg("salary").alias("avg_salary"))
# Show the final result
agg_df.show()
# Stop SparkSession
spark.stop()
'''
f"Note: output must contain only the converted pyspark (python format)code and no other pre or post text"
)
converted_code = llm.predict(prompt)
return converted_code
# Display content and analysis if a file is uploaded and the submit button is clicked
if submit_button and uploaded_file is not None:
# Read the uploaded file
file_content = uploaded_file.read().decode("utf-8")
# Display the uploaded document content in the main area
st.subheader("Uploaded Document Content:")
st.text_area("Document Content", file_content, height=300)
# Basic content analysis
st.subheader("Basic Analysis")
line_count = len(file_content.splitlines())
word_count = len(file_content.split())
st.write(f"**Line Count**: {line_count}")
# st.write(f"**Word Count**: {word_count}")
# Extract SQL elements via LLM
st.subheader("Code Discovery Report (LLM-Enhanced)")
sql_analysis = llm_extract_sql_elements(file_content)
st.write(sql_analysis)
# Code conversion if a valid language is selected
if language_choice != "Select a language":
st.subheader(f"Code Conversion to {language_choice.capitalize()}")
converted_code = llm_convert_code(file_content, language_choice)
# Display the converted code
st.text_area("Converted Code", converted_code, height=300)
# Option to download the converted code as a text file
st.download_button(
label="Download Converted Code",
data=converted_code,
file_name=f"converted_code_{language_choice}.py",
mime="text/plain"
)
elif submit_button and uploaded_file is None:
st.warning("Please upload a file before submitting.")
else:
st.info("Upload a document in the sidebar, select the target language, and click Submit to analyze.")
|