Starberry15 commited on
Commit
50e1eaf
·
verified ·
1 Parent(s): 16ccce0

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +36 -45
src/streamlit_app.py CHANGED
@@ -44,7 +44,8 @@ with st.sidebar:
44
 
45
  ANALYST_MODEL = st.selectbox(
46
  "Select Analysis Model:",
47
- [ "Qwen/Qwen2.5-14B-Instruct",
 
48
  "mistralai/Mistral-7B-Instruct-v0.3",
49
  "HuggingFaceH4/zephyr-7b-beta"
50
  ],
@@ -58,6 +59,33 @@ with st.sidebar:
58
  cleaner_client = InferenceClient(model=CLEANER_MODEL, token=HF_TOKEN)
59
  analyst_client = InferenceClient(model=ANALYST_MODEL, token=HF_TOKEN)
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  # ======================================================
62
  # 🧩 SMART DATA CLEANING
63
  # ======================================================
@@ -79,9 +107,7 @@ def fallback_clean(df: pd.DataFrame) -> pd.DataFrame:
79
 
80
 
81
  def ai_clean_dataset(df: pd.DataFrame) -> pd.DataFrame:
82
- """
83
- Cleans the dataset using the selected AI model. Falls back gracefully if the model fails.
84
- """
85
  raw_preview = df.head(5).to_csv(index=False)
86
  prompt = f"""
87
  You are a professional data cleaning assistant.
@@ -97,32 +123,11 @@ Return ONLY a valid CSV text (no markdown, no explanations).
97
  """
98
 
99
  try:
100
- # Try text-generation task first
101
- response = cleaner_client.text_generation(
102
- prompt,
103
- max_new_tokens=1024,
104
- temperature=0.1,
105
- return_full_text=False,
106
- )
107
- cleaned_str = response.strip()
108
  except Exception as e:
109
- # Retry with chat completion if needed
110
- if "Supported task: conversational" in str(e) or "not supported" in str(e):
111
- try:
112
- chat_resp = cleaner_client.chat_completion(
113
- messages=[{"role": "user", "content": prompt}],
114
- max_tokens=1024,
115
- temperature=0.1,
116
- )
117
- cleaned_str = chat_resp["choices"][0]["message"]["content"].strip()
118
- except Exception as e2:
119
- st.warning(f"⚠️ AI cleaning failed (chat mode): {e2}")
120
- return fallback_clean(df)
121
- else:
122
- st.warning(f"⚠️ AI cleaning failed ({e})")
123
- return fallback_clean(df)
124
 
125
- # Remove possible markdown/code fences
126
  cleaned_str = (
127
  cleaned_str.replace("```csv", "")
128
  .replace("```", "")
@@ -131,12 +136,10 @@ Return ONLY a valid CSV text (no markdown, no explanations).
131
  .strip()
132
  )
133
 
134
- # Keep only valid CSV-like lines
135
  lines = cleaned_str.splitlines()
136
  lines = [line for line in lines if "," in line and not line.lower().startswith(("note", "summary"))]
137
  cleaned_str = "\n".join(lines)
138
 
139
- # Try parsing robustly
140
  try:
141
  cleaned_df = pd.read_csv(StringIO(cleaned_str), on_bad_lines="skip")
142
  cleaned_df = cleaned_df.dropna(axis=1, how="all")
@@ -186,25 +189,13 @@ Respond with:
186
  3. Notable relationships or anomalies
187
  4. Data-driven recommendations
188
  """
 
189
  try:
190
- response = analyst_client.text_generation(
191
- prompt, temperature=temperature, max_new_tokens=max_tokens, return_full_text=False
192
- )
193
- return response.strip()
194
  except Exception as e:
195
- if "Supported task: conversational" in str(e) or "not supported" in str(e):
196
- try:
197
- chat_resp = analyst_client.chat_completion(
198
- messages=[{"role": "user", "content": prompt}],
199
- max_tokens=max_tokens,
200
- temperature=temperature,
201
- )
202
- return chat_resp["choices"][0]["message"]["content"].strip()
203
- except Exception as e2:
204
- return f"⚠️ Analysis failed (chat mode): {e2}"
205
  return f"⚠️ Analysis failed: {e}"
206
 
207
-
208
  # ======================================================
209
  # 🚀 MAIN APP LOGIC
210
  # ======================================================
 
44
 
45
  ANALYST_MODEL = st.selectbox(
46
  "Select Analysis Model:",
47
+ [
48
+ "Qwen/Qwen2.5-14B-Instruct",
49
  "mistralai/Mistral-7B-Instruct-v0.3",
50
  "HuggingFaceH4/zephyr-7b-beta"
51
  ],
 
59
  cleaner_client = InferenceClient(model=CLEANER_MODEL, token=HF_TOKEN)
60
  analyst_client = InferenceClient(model=ANALYST_MODEL, token=HF_TOKEN)
61
 
62
+ # ======================================================
63
+ # 🧩 SAFE GENERATION FUNCTION
64
+ # ======================================================
65
+ def safe_hf_generate(client, prompt, temperature=0.3, max_tokens=512):
66
+ """
67
+ Tries text_generation first, then falls back to chat_completion if not supported.
68
+ Returns plain string content.
69
+ """
70
+ try:
71
+ resp = client.text_generation(
72
+ prompt,
73
+ temperature=temperature,
74
+ max_new_tokens=max_tokens,
75
+ return_full_text=False,
76
+ )
77
+ return resp.strip()
78
+ except Exception as e:
79
+ if "Supported task: conversational" in str(e) or "not supported" in str(e):
80
+ chat_resp = client.chat_completion(
81
+ messages=[{"role": "user", "content": prompt}],
82
+ max_tokens=max_tokens,
83
+ temperature=temperature,
84
+ )
85
+ return chat_resp["choices"][0]["message"]["content"].strip()
86
+ else:
87
+ raise e
88
+
89
  # ======================================================
90
  # 🧩 SMART DATA CLEANING
91
  # ======================================================
 
107
 
108
 
109
  def ai_clean_dataset(df: pd.DataFrame) -> pd.DataFrame:
110
+ """Cleans the dataset using the selected AI model. Falls back gracefully if the model fails."""
 
 
111
  raw_preview = df.head(5).to_csv(index=False)
112
  prompt = f"""
113
  You are a professional data cleaning assistant.
 
123
  """
124
 
125
  try:
126
+ cleaned_str = safe_hf_generate(cleaner_client, prompt, temperature=0.1, max_tokens=1024)
 
 
 
 
 
 
 
127
  except Exception as e:
128
+ st.warning(f"⚠️ AI cleaning failed: {e}")
129
+ return fallback_clean(df)
 
 
 
 
 
 
 
 
 
 
 
 
 
130
 
 
131
  cleaned_str = (
132
  cleaned_str.replace("```csv", "")
133
  .replace("```", "")
 
136
  .strip()
137
  )
138
 
 
139
  lines = cleaned_str.splitlines()
140
  lines = [line for line in lines if "," in line and not line.lower().startswith(("note", "summary"))]
141
  cleaned_str = "\n".join(lines)
142
 
 
143
  try:
144
  cleaned_df = pd.read_csv(StringIO(cleaned_str), on_bad_lines="skip")
145
  cleaned_df = cleaned_df.dropna(axis=1, how="all")
 
189
  3. Notable relationships or anomalies
190
  4. Data-driven recommendations
191
  """
192
+
193
  try:
194
+ response = safe_hf_generate(analyst_client, prompt, temperature=temperature, max_tokens=max_tokens)
195
+ return response
 
 
196
  except Exception as e:
 
 
 
 
 
 
 
 
 
 
197
  return f"⚠️ Analysis failed: {e}"
198
 
 
199
  # ======================================================
200
  # 🚀 MAIN APP LOGIC
201
  # ======================================================