Spaces:

GloryIX
/

InsightAI

Sleeping

App Files Files Community

GloryIX commited on Feb 25, 2025

Commit

66211f5

verified ·

1 Parent(s): 16f1c26

Create app.py

Browse files

Files changed (1) hide show

app.py +158 -0

app.py ADDED Viewed

	@@ -0,0 +1,158 @@

+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import gradio as gr
+from sklearn.metrics.pairwise import cosine_similarity
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+from langchain.memory import ConversationBufferMemory
+from langchain.llms.huggingface_pipeline import HuggingFacePipeline
+from langchain.schema.runnable import RunnableLambda
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.vectorstores import FAISS
+from langchain.text_splitter import CharacterTextSplitter
+from langchain.chains.retrieval_qa.base import RetrievalQA
+import io
+import contextlib
+from PIL import Image
+import unittest
+from unittest.mock import patch
+df = pd.read_csv('/content/global-super-store-dataset/Global_Superstore2.csv', encoding='ISO-8859-1')
+schema_info = "\n".join([f"- `{col}` ({dtype})" for col, dtype in df.dtypes.items()])
+history_df = pd.read_csv('/content/sample_requests_and_code_300plus.csv')
+embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+faiss_index = FAISS.from_texts(history_df['request'].tolist(), embeddings)
+retriever = faiss_index.as_retriever()
+# Load the model
+model_name = "neuralmagic/Llama-2-7b-chat-quantized.w4a16"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(model_name)
+# Create a text-generation pipeline
+small_pipeline = pipeline(
+    "text-generation",
+    model=model,
+    tokenizer=tokenizer,
+    trust_remote_code=True,
+    device_map="auto",
+    max_new_tokens=250,
+    temperature=0.2,
+    top_p=0.9,
+    do_sample=True,
+    repetition_penalty=1.1,
+    pad_token_id=tokenizer.eos_token_id
+)
+llm = HuggingFacePipeline(pipeline=small_pipeline)
+memory = ConversationBufferMemory()
+retrieval_qa = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, chain_type="stuff")
+def generate_prompt(user_query, schema_info):
+    retrieved_docs = retrieval_qa.run(user_query)
+    similar_doc = retriever.get_relevant_documents(user_query, k=1)
+    similar_code = ""
+    if similar_doc:
+        idx = similar_doc[0].metadata.get('index', None)
+        if idx is not None:
+            similar_code = history_df.iloc[idx]['code']
+    messages = [
+        {"role": "system", "content": f"""
+        You are an expert data analyst. Your response MUST:
+        - Return ONLY valid Python Pandas code (no text, no introductions, no explanations, no extra comments).
+        - ⚠️ Start IMMEDIATELY with the Python code block.
+        - ⚡ Use proper parentheses when using logical operators (&, |) in Pandas conditions.
+        - Always include necessary import statements.
+        - ⚡ Do NOT add ANY extra lines, comments, or explanations.
+        {f"- Reference similar code: {similar_code}" if similar_code else ""}
+        """},
+        {"role": "user", "content": f"""
+        Dataset Schema:
+        {retrieved_docs}
+        Query: {user_query}
+        """}
+    ]
+    prompt = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)
+    return prompt
+def execute_generated_code(code):
+    local_env = {}
+    output = io.StringIO()
+    plt.close('all')
+    with contextlib.redirect_stdout(output), contextlib.redirect_stderr(output):
+        try:
+            exec(code, globals(), local_env)
+            if plt.get_fignums():
+                buf = io.BytesIO()
+                plt.savefig(buf, format='png')
+                buf.seek(0)
+                img = Image.open(buf)
+                return img
+            return None
+        except Exception:
+            return None
+def process_query(user_query):
+    prompt = generate_prompt(user_query, schema_info)
+    llm_chain = RunnableLambda(lambda x: llm(x["user_query"]))
+    response = llm_chain.invoke({"user_query": prompt})
+    generated_code = response.strip()
+    if "```python" in generated_code:
+        generated_code = generated_code.split("```python")[1].split("```", 1)[0].strip()
+    elif "```" in generated_code:
+        generated_code = generated_code.split("```", 1)[1].split("```", 1)[0].strip()
+    return generated_code
+def gradio_chat_interface(history, query):
+    history.append((query, "⏳ **Processing...**"))
+    yield history, None, ""
+    generated_code = process_query(query)
+    with open('/content/generated_code.py', 'w') as f:
+        f.write(generated_code)
+    image = execute_generated_code(generated_code)
+    history[-1] = (query, f"```python\n{generated_code}\n```) ")
+    yield history, image, ""
+with gr.Blocks() as demo:
+    gr.Markdown("""
+    # **Interactive Pandas Chat with InsightAI** 💬
+    **Talk to your data, get instant answers!**
+    <div style="text-align: center;">
+        <table style="margin: 0 auto;">
+            <tr>
+                <td>🔍 <strong>Explore your dataset!</strong></td>
+                <td>💻 <strong>Instantly view generated Pandas code.</strong></td>
+            </tr>
+            <tr>
+                <td>📊 <strong>Get accurate responses with RAG-enhanced retrieval.</strong></td>
+                <td>📈 <strong>Live visualizations update on the right.</strong></td>
+            </tr>
+        </table>
+    </div>
+    """)
+    with gr.Row():
+        with gr.Column(scale=3):
+            chatbot = gr.Chatbot(label="Chat with RAG & Historical Context Expert")
+            query_input = gr.Textbox(placeholder="Type your query and press Enter...", label="Your Query")
+        with gr.Column(scale=2):
+            plot_output = gr.Image(label="📊 Visualization", height=500)
+    query_input.submit(
+        fn=gradio_chat_interface,
+        inputs=[chatbot, query_input],
+        outputs=[chatbot, plot_output, query_input]
+    )
+demo.launch()