import streamlit as st import pandas as pd import matplotlib.pyplot as plt import seaborn as sns from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline # Configure page st.set_page_config(page_title="Data Augmentation App", layout="wide") st.markdown( f""" """, unsafe_allow_html=True, ) st.title("Data Augmentation and Analysis App") st.sidebar.title("Upload Your File") st.sidebar.markdown("Supported formats: CSV, Excel") # Get the Hugging Face API key from secrets hf_api_key = st.secrets.get("HUGGINGFACE_KEY") if not hf_api_key: st.error("Hugging Face API key not found in secrets.") else: # Initialize the model and tokenizer using the API key try: model_name = "llama3-70b-8192" # Replace with the correct model name if needed model = AutoModelForCausalLM.from_pretrained(model_name, use_auth_token=hf_api_key) tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=hf_api_key) llm_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer) st.success(f"Model {model_name} initialized successfully!") except Exception as e: st.error(f"Error initializing model: {e}") def load_file(uploaded_file): """Load the uploaded file.""" if uploaded_file.name.endswith('.csv'): return pd.read_csv(uploaded_file) elif uploaded_file.name.endswith('.xlsx'): return pd.read_excel(uploaded_file) else: st.error("Unsupported file format. Please upload a CSV or Excel file.") return None def generate_graph(data, query): """Generate a graph based on user query.""" try: fig, ax = plt.subplots(figsize=(10, 6)) if "correlation" in query.lower(): sns.heatmap(data.corr(), annot=True, cmap="coolwarm", ax=ax) st.pyplot(fig) elif "histogram" in query.lower(): column = st.selectbox("Select a column for the histogram", data.columns) sns.histplot(data[column], kde=True, ax=ax) st.pyplot(fig) else: st.error("Unsupported graph type. Try asking for a correlation matrix or histogram.") except Exception as e: st.error(f"Error generating graph: {e}") def handle_query(data, query): """Handle user query using the LLM.""" try: if not llm_pipeline: st.error("LLM pipeline is not initialized. Check for errors in setup.") return prompt = f"Given the dataset: {data.to_dict(orient='records')}, answer the following: {query}" response = llm_pipeline(prompt, max_length=200, num_return_sequences=1) st.write("Response:", response[0]['generated_text']) except Exception as e: st.error(f"Error in LLM processing: {e}") # Main App uploaded_file = st.sidebar.file_uploader("Upload your file here", type=["csv", "xlsx"]) if uploaded_file: data = load_file(uploaded_file) if data is not None: st.write("Dataset Preview") st.dataframe(data) query = st.text_area("Ask your question about the dataset") if query: if "table" in query.lower(): st.write("Table Preview") st.write(data) elif "graph" in query.lower(): generate_graph(data, query) elif "predict" in query.lower(): st.write("Prediction functionality is in progress.") else: handle_query(data, query) footer = """