Spaces:

Starberry15
/

data_analysis

Sleeping

App Files Files Community

Starberry15 commited on Oct 22

Commit

50e1eaf

verified ·

1 Parent(s): 16ccce0

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +36 -45

src/streamlit_app.py CHANGED Viewed

@@ -44,7 +44,8 @@ with st.sidebar:
     ANALYST_MODEL = st.selectbox(
         "Select Analysis Model:",
-        [   "Qwen/Qwen2.5-14B-Instruct",
             "mistralai/Mistral-7B-Instruct-v0.3",
             "HuggingFaceH4/zephyr-7b-beta"
         ],
@@ -58,6 +59,33 @@ with st.sidebar:
 cleaner_client = InferenceClient(model=CLEANER_MODEL, token=HF_TOKEN)
 analyst_client = InferenceClient(model=ANALYST_MODEL, token=HF_TOKEN)
 # ======================================================
 # 🧩 SMART DATA CLEANING
 # ======================================================
@@ -79,9 +107,7 @@ def fallback_clean(df: pd.DataFrame) -> pd.DataFrame:
 def ai_clean_dataset(df: pd.DataFrame) -> pd.DataFrame:
-    """
-    Cleans the dataset using the selected AI model. Falls back gracefully if the model fails.
-    """
     raw_preview = df.head(5).to_csv(index=False)
     prompt = f"""
 You are a professional data cleaning assistant.
@@ -97,32 +123,11 @@ Return ONLY a valid CSV text (no markdown, no explanations).
 """
     try:
-        # Try text-generation task first
-        response = cleaner_client.text_generation(
-            prompt,
-            max_new_tokens=1024,
-            temperature=0.1,
-            return_full_text=False,
-        )
-        cleaned_str = response.strip()
     except Exception as e:
-        # Retry with chat completion if needed
-        if "Supported task: conversational" in str(e) or "not supported" in str(e):
-            try:
-                chat_resp = cleaner_client.chat_completion(
-                    messages=[{"role": "user", "content": prompt}],
-                    max_tokens=1024,
-                    temperature=0.1,
-                )
-                cleaned_str = chat_resp["choices"][0]["message"]["content"].strip()
-            except Exception as e2:
-                st.warning(f"⚠️ AI cleaning failed (chat mode): {e2}")
-                return fallback_clean(df)
-        else:
-            st.warning(f"⚠️ AI cleaning failed ({e})")
-            return fallback_clean(df)
-    # Remove possible markdown/code fences
     cleaned_str = (
         cleaned_str.replace("```csv", "")
         .replace("```", "")
@@ -131,12 +136,10 @@ Return ONLY a valid CSV text (no markdown, no explanations).
         .strip()
     )
-    # Keep only valid CSV-like lines
     lines = cleaned_str.splitlines()
     lines = [line for line in lines if "," in line and not line.lower().startswith(("note", "summary"))]
     cleaned_str = "\n".join(lines)
-    # Try parsing robustly
     try:
         cleaned_df = pd.read_csv(StringIO(cleaned_str), on_bad_lines="skip")
         cleaned_df = cleaned_df.dropna(axis=1, how="all")
@@ -186,25 +189,13 @@ Respond with:
 3. Notable relationships or anomalies
 4. Data-driven recommendations
 """
     try:
-        response = analyst_client.text_generation(
-            prompt, temperature=temperature, max_new_tokens=max_tokens, return_full_text=False
-        )
-        return response.strip()
     except Exception as e:
-        if "Supported task: conversational" in str(e) or "not supported" in str(e):
-            try:
-                chat_resp = analyst_client.chat_completion(
-                    messages=[{"role": "user", "content": prompt}],
-                    max_tokens=max_tokens,
-                    temperature=temperature,
-                )
-                return chat_resp["choices"][0]["message"]["content"].strip()
-            except Exception as e2:
-                return f"⚠️ Analysis failed (chat mode): {e2}"
         return f"⚠️ Analysis failed: {e}"
 # ======================================================
 # 🚀 MAIN APP LOGIC
 # ======================================================

     ANALYST_MODEL = st.selectbox(
         "Select Analysis Model:",
+        [
+            "Qwen/Qwen2.5-14B-Instruct",
             "mistralai/Mistral-7B-Instruct-v0.3",
             "HuggingFaceH4/zephyr-7b-beta"
         ],
 cleaner_client = InferenceClient(model=CLEANER_MODEL, token=HF_TOKEN)
 analyst_client = InferenceClient(model=ANALYST_MODEL, token=HF_TOKEN)
+# ======================================================
+# 🧩 SAFE GENERATION FUNCTION
+# ======================================================
+def safe_hf_generate(client, prompt, temperature=0.3, max_tokens=512):
+    """
+    Tries text_generation first, then falls back to chat_completion if not supported.
+    Returns plain string content.
+    """
+    try:
+        resp = client.text_generation(
+            prompt,
+            temperature=temperature,
+            max_new_tokens=max_tokens,
+            return_full_text=False,
+        )
+        return resp.strip()
+    except Exception as e:
+        if "Supported task: conversational" in str(e) or "not supported" in str(e):
+            chat_resp = client.chat_completion(
+                messages=[{"role": "user", "content": prompt}],
+                max_tokens=max_tokens,
+                temperature=temperature,
+            )
+            return chat_resp["choices"][0]["message"]["content"].strip()
+        else:
+            raise e
 # ======================================================
 # 🧩 SMART DATA CLEANING
 # ======================================================
 def ai_clean_dataset(df: pd.DataFrame) -> pd.DataFrame:
+    """Cleans the dataset using the selected AI model. Falls back gracefully if the model fails."""
     raw_preview = df.head(5).to_csv(index=False)
     prompt = f"""
 You are a professional data cleaning assistant.
 """
     try:
+        cleaned_str = safe_hf_generate(cleaner_client, prompt, temperature=0.1, max_tokens=1024)
     except Exception as e:
+        st.warning(f"⚠️ AI cleaning failed: {e}")
+        return fallback_clean(df)
     cleaned_str = (
         cleaned_str.replace("```csv", "")
         .replace("```", "")
         .strip()
     )
     lines = cleaned_str.splitlines()
     lines = [line for line in lines if "," in line and not line.lower().startswith(("note", "summary"))]
     cleaned_str = "\n".join(lines)
     try:
         cleaned_df = pd.read_csv(StringIO(cleaned_str), on_bad_lines="skip")
         cleaned_df = cleaned_df.dropna(axis=1, how="all")
 3. Notable relationships or anomalies
 4. Data-driven recommendations
 """
     try:
+        response = safe_hf_generate(analyst_client, prompt, temperature=temperature, max_tokens=max_tokens)
+        return response
     except Exception as e:
         return f"⚠️ Analysis failed: {e}"
 # ======================================================
 # 🚀 MAIN APP LOGIC
 # ======================================================