Spaces:

DrishtiSharma
/

sql-rag

Build error

App Files Files Community

DrishtiSharma commited on Jan 13

Commit

59727ea

verified ·

1 Parent(s): 10150e1

Update app.py

Browse files

Files changed (1) hide show

app.py +3 -133

app.py CHANGED Viewed

@@ -22,7 +22,6 @@ from langchain_community.tools.sql_database.tool import (
 )
 from langchain_community.utilities.sql_database import SQLDatabase
 from datasets import load_dataset
-from difflib import get_close_matches
 import tempfile
 st.title("SQL-RAG Using CrewAI 🚀")
@@ -177,138 +176,6 @@ def escape_markdown(text):
     escape_chars = r"(\*|_|`|~)"
     return re.sub(escape_chars, r"\\\1", text)
-# Synonym mapping for flexible query understanding
-COLUMN_SYNONYMS = {
-    "job_title": ["job title", "job role", "role", "designation", "position", "job responsibility", "occupation"],
-    "experience_level": ["experience level", "seniority", "experience", "career stage", "years of experience"],
-    "employment_type": ["employment type", "job type", "contract type", "employment status", "type of employment"],
-    "salary_in_usd": ["salary", "income", "earnings", "pay", "wage", "compensation", "amount", "paid"],
-    "remote_ratio": ["remote work", "work from home", "remote ratio", "remote", "telecommute", "wfh"],
-    "company_size": ["company size", "organization size", "business size", "firm size", "big", "small"],
-    #"employee_residence": ["country", "residence", "location", "employee location"],
-    "company_location": ["company location", "office location", "company country", "headquarters", "location", "located", "area"],
-}
-# Fuzzy matcher for mapping query terms to dataset columns
-def fuzzy_match_columns(query):
-    query = query.lower()
-    all_synonyms = {synonym: col for col, synonyms in COLUMN_SYNONYMS.items() for synonym in synonyms}
-    words = query.replace("and", "").replace("vs", "").replace("by", "").split()
-    matched_columns = []
-    for word in words:
-        matches = get_close_matches(word, all_synonyms.keys(), n=1, cutoff=0.6)
-        matched_columns.extend([all_synonyms[match] for match in matches])
-    return list(dict.fromkeys(matched_columns))
-# Ask LLM to suggest relevant columns if fuzzy matching fails
-def ask_llm_for_columns(query, llm, df):
-    columns = ', '.join(df.columns)
-    prompt = f"""
-    Analyze this user query and suggest the most relevant dataset columns for visualization.
-    Query: "{query}"
-    Available Columns: {columns}
-    Respond in this JSON format:
-    {{
-      "x_axis": "column_name",
-      "y_axis": "column_name",
-      "group_by": "optional_column_name"
-    }}
-    """
-    response = llm.generate(prompt)
-    try:
-        suggestion = json.loads(response)
-        return suggestion
-    except json.JSONDecodeError:
-        st.error("⚠️ Failed to interpret AI response. Please refine your query.")
-        return None
-# Add min, max, and average salary annotations to the chart
-def add_stats_to_figure(fig, df, y_axis):
-    min_salary = df[y_axis].min()
-    max_salary = df[y_axis].max()
-    avg_salary = df[y_axis].mean()
-    fig.add_annotation(
-        text=f"Min: ${min_salary:,.2f} | Max: ${max_salary:,.2f} | Avg: ${avg_salary:,.2f}",
-        xref="paper", yref="paper",
-        x=0.5, y=1.1,
-        showarrow=False,
-        font=dict(size=12, color="black"),
-        bgcolor="rgba(255, 255, 255, 0.7)"
-    )
-    return fig
-# Unified Visualization Generator with Fuzzy Matching and LLM Fallback
-def generate_visual_from_query(query, df, llm=None):
-    try:
-        # Step 1: Attempt Fuzzy Matching
-        matched_columns = fuzzy_match_columns(query)
-        # Step 2: Fallback to LLM if no columns are matched
-        if not matched_columns and llm:
-            st.info("🤖 No match found. Asking AI for suggestions...")
-            suggestion = ask_llm_for_columns(query, llm, df)
-            if suggestion:
-                matched_columns = [suggestion.get("x_axis"), suggestion.get("group_by")]
-        # Step 3: Process Matched Columns
-        if len(matched_columns) >= 2:
-            x_axis, group_by = matched_columns[0], matched_columns[1]
-        elif len(matched_columns) == 1:
-            x_axis, group_by = matched_columns[0], None
-        else:
-            st.warning("❓ No matching columns found. Try rephrasing your query.")
-            return None
-        # Step 4: Visualization Generation
-        # Distribution Plot
-        if "distribution" in query:
-            fig = px.box(df, x=x_axis, y="salary_in_usd", color=group_by,
-                         title=f"Salary Distribution by {x_axis.replace('_', ' ').title()}"
-                               + (f" and {group_by.replace('_', ' ').title()}" if group_by else ""))
-            return add_stats_to_figure(fig, df, "salary_in_usd")
-        # Average Salary Plot
-        elif "average" in query or "mean" in query:
-            grouped_df = df.groupby([x_axis] + ([group_by] if group_by else []))["salary_in_usd"].mean().reset_index()
-            fig = px.bar(grouped_df, x=x_axis, y="salary_in_usd", color=group_by,
-                         title=f"Average Salary by {x_axis.replace('_', ' ').title()}"
-                               + (f" and {group_by.replace('_', ' ').title()}" if group_by else ""))
-            return add_stats_to_figure(fig, df, "salary_in_usd")
-        # Salary Trends Over Time
-        elif "trend" in query and "work_year" in df.columns:
-            grouped_df = df.groupby(["work_year", x_axis])["salary_in_usd"].mean().reset_index()
-            fig = px.line(grouped_df, x="work_year", y="salary_in_usd", color=x_axis,
-                          title=f"Salary Trend Over Years by {x_axis.replace('_', ' ').title()}")
-            return add_stats_to_figure(fig, df, "salary_in_usd")
-        # Remote Work Impact
-        elif "remote" in query:
-            grouped_df = df.groupby(["remote_ratio"] + ([group_by] if group_by else []))["salary_in_usd"].mean().reset_index()
-            fig = px.bar(grouped_df, x="remote_ratio", y="salary_in_usd", color=group_by,
-                         title="Remote Work Impact on Salary")
-            return add_stats_to_figure(fig, df, "salary_in_usd")
-        # No Specific Match
-        else:
-            st.warning("⚠️ No suitable visualization to display!")
-            return None
-    except Exception as e:
-        st.error(f"Error generating visualization: {e}")
-        return None
 # SQL-RAG Analysis
 if st.session_state.df is not None:
     temp_dir = tempfile.TemporaryDirectory()
@@ -396,6 +263,8 @@ if st.session_state.df is not None:
         context=[analyze_data],
     )
     # Separate Crews for report and conclusion
     crew_report = Crew(
         agents=[sql_dev, data_analyst, report_writer],
@@ -487,3 +356,4 @@ else:
 with st.sidebar:
     st.header("📚 Reference:")
     st.markdown("[SQL Agents w CrewAI & Llama 3 - Plaban Nayak](https://github.com/plaban1981/Agents/blob/main/SQL_Agents_with_CrewAI_and_Llama_3.ipynb)")

 )
 from langchain_community.utilities.sql_database import SQLDatabase
 from datasets import load_dataset
 import tempfile
 st.title("SQL-RAG Using CrewAI 🚀")
     escape_chars = r"(\*|_|`|~)"
     return re.sub(escape_chars, r"\\\1", text)
 # SQL-RAG Analysis
 if st.session_state.df is not None:
     temp_dir = tempfile.TemporaryDirectory()
         context=[analyze_data],
     )
     # Separate Crews for report and conclusion
     crew_report = Crew(
         agents=[sql_dev, data_analyst, report_writer],
 with st.sidebar:
     st.header("📚 Reference:")
     st.markdown("[SQL Agents w CrewAI & Llama 3 - Plaban Nayak](https://github.com/plaban1981/Agents/blob/main/SQL_Agents_with_CrewAI_and_Llama_3.ipynb)")