Spaces:

yonkoyonks
/

csvBot

Sleeping

App Files Files Community

yonkoyonks commited on Oct 16, 2025

Commit

c23a150

verified ·

1 Parent(s): a3796ba

Update utils.py

Browse files

Files changed (1) hide show

utils.py +61 -61

utils.py CHANGED Viewed

@@ -1,62 +1,62 @@
-from langchain_community.llms import LlamaCpp
-from dotenv import load_dotenv
-import os
-import pandas as pd
-# Load environment variables
-load_dotenv()
-MODEL_PATH = os.getenv("MODEL_PATH", "./models/gemma-2b-it.Q2_K.gguf")
-def summarize_dataframe(df: pd.DataFrame, max_rows: int = 30) -> str:
-    """Summarize a dataframe to avoid overloading the context window."""
-    summary = f"Columns: {', '.join(df.columns)}\n\n"
-    if len(df) > max_rows:
-        sample = df.sample(max_rows, random_state=42)
-        summary += "Showing a random sample of rows:\n"
-    else:
-        sample = df
-        summary += "Showing all rows:\n"
-    summary += sample.to_string(index=False)
-    return summary
-def query_agent(df: pd.DataFrame, query: str) -> str:
-    """Query a CSV/DataFrame using your local Gemma model with context-aware limits."""
-    #  Attempt to handle simple analytical questions directly with pandas
-    query_lower = query.lower()
-    try:
-        if "most common" in query_lower or "most frequent" in query_lower:
-            for col in df.columns:
-                if col.lower() in query_lower:
-                    value = df[col].mode()[0]
-                    return f"The most common value in column '{col}' is '{value}'."
-    except Exception as e:
-        print("Direct analysis failed:", e)
-    #  Otherwise summarize dataset for LLM
-    data_text = summarize_dataframe(df)
-    prompt = f"""
-    You are a data analysis assistant with expertise in statistics and data interpretation.
-    Analyze the dataset sample below and answer the user's question in a **clear, detailed, and well-explained way**.
-    Include both the **direct answer** and a short **explanation or reasoning** behind it.
-    Dataset Summary:
-    {data_text}
-    Question:
-    {query}
-    Answer (with explanation):
-    """
-    llm = LlamaCpp(
-        model_path=MODEL_PATH,
-        temperature=0.7,
-        max_new_tokens=1024,
-        n_ctx=16384,
-        verbose=True,
-    )
-    answer = llm(prompt)
     return answer

+from langchain_community.llms import LlamaCpp
+from dotenv import load_dotenv
+import os
+import pandas as pd
+# Load environment variables
+load_dotenv()
+MODEL_PATH = os.getenv("MODEL_PATH", "TheBloke/gemma-2b-it-GGUF/gemma-2b-it.Q4_K_M.gguf")
+def summarize_dataframe(df: pd.DataFrame, max_rows: int = 30) -> str:
+    """Summarize a dataframe to avoid overloading the context window."""
+    summary = f"Columns: {', '.join(df.columns)}\n\n"
+    if len(df) > max_rows:
+        sample = df.sample(max_rows, random_state=42)
+        summary += "Showing a random sample of rows:\n"
+    else:
+        sample = df
+        summary += "Showing all rows:\n"
+    summary += sample.to_string(index=False)
+    return summary
+def query_agent(df: pd.DataFrame, query: str) -> str:
+    """Query a CSV/DataFrame using your local Gemma model with context-aware limits."""
+    #  Attempt to handle simple analytical questions directly with pandas
+    query_lower = query.lower()
+    try:
+        if "most common" in query_lower or "most frequent" in query_lower:
+            for col in df.columns:
+                if col.lower() in query_lower:
+                    value = df[col].mode()[0]
+                    return f"The most common value in column '{col}' is '{value}'."
+    except Exception as e:
+        print("Direct analysis failed:", e)
+    #  Otherwise summarize dataset for LLM
+    data_text = summarize_dataframe(df)
+    prompt = f"""
+    You are a data analysis assistant with expertise in statistics and data interpretation.
+    Analyze the dataset sample below and answer the user's question in a **clear, detailed, and well-explained way**.
+    Include both the **direct answer** and a short **explanation or reasoning** behind it.
+    Dataset Summary:
+    {data_text}
+    Question:
+    {query}
+    Answer (with explanation):
+    """
+    llm = LlamaCpp(
+        model_path=MODEL_PATH,
+        temperature=0.7,
+        max_new_tokens=1024,
+        n_ctx=16384,
+        verbose=True,
+    )
+    answer = llm(prompt)
     return answer