import os import sys import gradio as gr import tempfile import pandas as pd import sqlite3 from langchain_core.prompts import ChatPromptTemplate #test # Add parent directory to path to import backend modules sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from backend.main import process_query, upload_document, process_voice, DocumentAssistant from backend.db import SQLiteDB from backend.vector_db import ChromaVectorDB from backend.query_engine import QueryEngine from backend.voice_assist import VoiceAssistant from backend.document_parser import DocumentParser from backend.agents import DocumentAgents # Initialize components sqlite_db = SQLiteDB() vector_db = ChromaVectorDB(os.getenv("CHROMA_DB_PATH", "./data/chroma_db")) query_engine = QueryEngine() voice_assistant = VoiceAssistant() # Initialize the document parser and agents document_parser = DocumentParser() document_agents = DocumentAgents() # Initialize DocumentAssistant document_assistant = DocumentAssistant() # Define the prompt with examples query_prompt = ChatPromptTemplate.from_messages( [ ("system", """ You are an SQL and data analysis expert. Generate an appropriate SQL query using SQLite syntax for the question provided, without any explanations or code comments. Follow SQLite-specific conventions, as shown in the examples below: Example 1: Question: "What is the average fare for trips over 10 miles?" SQL Query: SELECT AVG(fare_amount) FROM taxi_data WHERE trip_distance > 10; Example 2: Question: "How many trips were taken in each month?" SQL Query: SELECT strftime('%m', pickup_datetime) AS month, COUNT(*) AS trip_count FROM taxi_data GROUP BY month; Example 3: Question: "What is the total fare amount for each driver (medallion) per day?" SQL Query: SELECT DATE(pickup_datetime) AS date, medallion, SUM(fare_amount) AS total_fare FROM taxi_data GROUP BY date, medallion; SQLite-Specific Conventions: 1. Date and Time Extraction: - Instead of `EXTRACT(YEAR FROM column)`, use `strftime('%Y', column)` to extract the year. - Example: `SELECT strftime('%Y', pickup_datetime) FROM taxi_data;` 2. String Length: - Instead of `CHAR_LENGTH(column)`, use `LENGTH(column)`. - Example: `SELECT LENGTH(passenger_name) FROM taxi_data;` 3. Regular Expressions: - SQLite does not support `REGEXP`. Use `LIKE` for simple patterns or avoid regular expressions. - Example: `SELECT * FROM taxi_data WHERE passenger_name LIKE 'A%';` 4. Window Functions: - For row numbering, use `ROW_NUMBER()` if supported, or simulate with joins. - Example: `SELECT id, ROW_NUMBER() OVER (ORDER BY pickup_datetime) AS row_num FROM taxi_data;` 5. Data Type Casting: - Use `CAST(column AS TYPE)`, but note that SQLite supports limited types. - Example: `SELECT CAST(fare_amount AS INTEGER) FROM taxi_data;` 6. Full Outer Join Workaround: - SQLite doesn't support `FULL OUTER JOIN`. Combine `LEFT JOIN` and `UNION` for a similar effect. - Example: ``` SELECT a.*, b.* FROM table_a a LEFT JOIN table_b b ON a.id = b.id UNION SELECT a.*, b.* FROM table_a a RIGHT JOIN table_b b ON a.id = b.id; ``` Use these examples and guidelines to generate an SQL query compatible with SQLite syntax for the question provided. """), ("human", "{question}"), ] ) def process_text_query(query, history): """Process a text query and update chat history""" # Log query to database sqlite_db.log_query(query) # Use DocumentAssistant to process the query response = document_assistant.process_query(query) # Update database with response sqlite_db.log_query(query, response) # Update history history.append((query, response)) return "", history def process_file_upload(files): """Process uploaded files and index them""" file_info = [] for file in files: file_path = file.name file_name = os.path.basename(file_path) file_type = os.path.splitext(file_name)[1].lower() # Parse document text_chunks = document_parser.parse_document(file_path) # Add to SQLite DB doc_id = sqlite_db.add_document(file_name, file_path, file_type) # Add to vector DB vector_db.add_document(file_path, text_chunks, {"doc_id": doc_id}) file_info.append(f"Indexed: {file_name} ({len(text_chunks)} chunks)") return "\n".join(file_info) def process_voice_input(audio_path): """Process voice input and return transcribed text""" if audio_path is None: return "No audio recorded" # Transcribe audio text = voice_assistant.speech_to_text(audio_path) return text def text_to_speech_output(text): """Convert text to speech""" if not text: return None audio_path = voice_assistant.text_to_speech(text) return audio_path def load_csv_to_sqlite(file_path, conn): # Read the CSV in chunks chunksize = 1000 # Adjust based on your memory constraints for chunk in pd.read_csv(file_path, chunksize=chunksize): # Perform any necessary data cleaning on the chunk if 'pickup_datetime' in chunk.columns: chunk['pickup_datetime'] = pd.to_datetime(chunk['pickup_datetime'], errors='coerce') chunk = chunk.dropna(subset=['pickup_datetime']) # Load the chunk into the SQLite database chunk.to_sql('data_tab', conn, if_exists='append', index=False, method='multi') # Create Gradio interface with gr.Blocks(title="AI Document Analysis & Voice Assistant") as demo: gr.Markdown("# 🤖 AI Document Analysis & Voice Assistant") gr.Markdown("Upload documents, ask questions, and get voice responses!") with gr.Tab("Chat"): chatbot = gr.Chatbot(height=400) with gr.Row(): with gr.Column(scale=8): msg = gr.Textbox( placeholder="Ask a question about your documents...", show_label=False ) with gr.Column(scale=1): voice_btn = gr.Button("🎤") with gr.Row(): submit_btn = gr.Button("Submit") clear_btn = gr.Button("Clear") audio_output = gr.Audio(label="Voice Response", type="filepath") # Voice input voice_input = gr.Audio( label="Voice Input", type="filepath", visible=False ) # Event handlers submit_btn.click( process_text_query, inputs=[msg, chatbot], outputs=[msg, chatbot] ) msg.submit( process_text_query, inputs=[msg, chatbot], outputs=[msg, chatbot] ) clear_btn.click(lambda: None, None, chatbot, queue=False) voice_btn.click( lambda: gr.update(visible=True), None, voice_input ) voice_input.change( process_voice_input, inputs=[voice_input], outputs=[msg] ) # Add TTS functionality tts_btn = gr.Button("🔊 Speak Response") tts_btn.click( text_to_speech_output, inputs=[chatbot], outputs=[audio_output] ) with gr.Tab("Document Upload"): file_upload = gr.File( label="Upload Documents", file_types=[".pdf", ".txt", ".docx", ".csv", ".xlsx"], file_count="multiple" ) upload_button = gr.Button("Process & Index Documents") upload_output = gr.Textbox(label="Upload Status") upload_button.click( process_file_upload, inputs=[file_upload], outputs=[upload_output] ) with gr.Tab("Settings"): gr.Markdown("## System Settings") api_key = gr.Textbox( label="Groq API Key", placeholder="Enter your Groq API key", type="password" ) save_btn = gr.Button("Save Settings") def save_settings(key): os.environ["GROQ_API_KEY"] = key return "Settings saved!" save_btn.click( save_settings, inputs=[api_key], outputs=[gr.Textbox(label="Status")] ) with gr.Tab("Advanced Query"): gr.Markdown("# 🧠 Complex Query Processing") gr.Markdown("Use AI agents to process complex queries about your documents") complex_chatbot = gr.Chatbot(height=400) with gr.Row(): complex_msg = gr.Textbox( placeholder="Ask a complex question requiring analysis...", show_label=False ) with gr.Row(): complex_submit_btn = gr.Button("Process with Agents") complex_clear_btn = gr.Button("Clear") # Event handlers complex_submit_btn.click( process_complex_query, inputs=[complex_msg, complex_chatbot], outputs=[complex_msg, complex_chatbot] ) complex_msg.submit( process_complex_query, inputs=[complex_msg, complex_chatbot], outputs=[complex_msg, complex_chatbot] ) complex_clear_btn.click(lambda: None, None, complex_chatbot, queue=False) # Launch the app if __name__ == "__main__": demo.launch()