|
|
import os |
|
|
import sys |
|
|
import gradio as gr |
|
|
import tempfile |
|
|
import pandas as pd |
|
|
import sqlite3 |
|
|
from langchain_core.prompts import ChatPromptTemplate |
|
|
|
|
|
|
|
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) |
|
|
|
|
|
from backend.main import process_query, upload_document, process_voice, DocumentAssistant |
|
|
from backend.db import SQLiteDB |
|
|
from backend.vector_db import ChromaVectorDB |
|
|
from backend.query_engine import QueryEngine |
|
|
from backend.voice_assist import VoiceAssistant |
|
|
from backend.document_parser import DocumentParser |
|
|
from backend.agents import DocumentAgents |
|
|
|
|
|
|
|
|
sqlite_db = SQLiteDB() |
|
|
vector_db = ChromaVectorDB(os.getenv("CHROMA_DB_PATH", "./data/chroma_db")) |
|
|
query_engine = QueryEngine() |
|
|
voice_assistant = VoiceAssistant() |
|
|
|
|
|
|
|
|
document_parser = DocumentParser() |
|
|
document_agents = DocumentAgents() |
|
|
|
|
|
|
|
|
document_assistant = DocumentAssistant() |
|
|
|
|
|
|
|
|
query_prompt = ChatPromptTemplate.from_messages( |
|
|
[ |
|
|
("system", """ |
|
|
You are an SQL and data analysis expert. Generate an appropriate SQL query using SQLite syntax for the question provided, without any explanations or code comments. |
|
|
Follow SQLite-specific conventions, as shown in the examples below: |
|
|
|
|
|
Example 1: |
|
|
Question: "What is the average fare for trips over 10 miles?" |
|
|
SQL Query: SELECT AVG(fare_amount) FROM taxi_data WHERE trip_distance > 10; |
|
|
|
|
|
Example 2: |
|
|
Question: "How many trips were taken in each month?" |
|
|
SQL Query: SELECT strftime('%m', pickup_datetime) AS month, COUNT(*) AS trip_count FROM taxi_data GROUP BY month; |
|
|
|
|
|
Example 3: |
|
|
Question: "What is the total fare amount for each driver (medallion) per day?" |
|
|
SQL Query: SELECT DATE(pickup_datetime) AS date, medallion, SUM(fare_amount) AS total_fare FROM taxi_data GROUP BY date, medallion; |
|
|
|
|
|
SQLite-Specific Conventions: |
|
|
|
|
|
1. Date and Time Extraction: |
|
|
- Instead of `EXTRACT(YEAR FROM column)`, use `strftime('%Y', column)` to extract the year. |
|
|
- Example: `SELECT strftime('%Y', pickup_datetime) FROM taxi_data;` |
|
|
|
|
|
2. String Length: |
|
|
- Instead of `CHAR_LENGTH(column)`, use `LENGTH(column)`. |
|
|
- Example: `SELECT LENGTH(passenger_name) FROM taxi_data;` |
|
|
|
|
|
3. Regular Expressions: |
|
|
- SQLite does not support `REGEXP`. Use `LIKE` for simple patterns or avoid regular expressions. |
|
|
- Example: `SELECT * FROM taxi_data WHERE passenger_name LIKE 'A%';` |
|
|
|
|
|
4. Window Functions: |
|
|
- For row numbering, use `ROW_NUMBER()` if supported, or simulate with joins. |
|
|
- Example: `SELECT id, ROW_NUMBER() OVER (ORDER BY pickup_datetime) AS row_num FROM taxi_data;` |
|
|
|
|
|
5. Data Type Casting: |
|
|
- Use `CAST(column AS TYPE)`, but note that SQLite supports limited types. |
|
|
- Example: `SELECT CAST(fare_amount AS INTEGER) FROM taxi_data;` |
|
|
|
|
|
6. Full Outer Join Workaround: |
|
|
- SQLite doesn't support `FULL OUTER JOIN`. Combine `LEFT JOIN` and `UNION` for a similar effect. |
|
|
- Example: |
|
|
``` |
|
|
SELECT a.*, b.* |
|
|
FROM table_a a |
|
|
LEFT JOIN table_b b ON a.id = b.id |
|
|
UNION |
|
|
SELECT a.*, b.* |
|
|
FROM table_a a |
|
|
RIGHT JOIN table_b b ON a.id = b.id; |
|
|
``` |
|
|
|
|
|
Use these examples and guidelines to generate an SQL query compatible with SQLite syntax for the question provided. |
|
|
"""), |
|
|
("human", "{question}"), |
|
|
] |
|
|
) |
|
|
|
|
|
def process_text_query(query, history): |
|
|
"""Process a text query and update chat history""" |
|
|
|
|
|
sqlite_db.log_query(query) |
|
|
|
|
|
|
|
|
response = document_assistant.process_query(query) |
|
|
|
|
|
|
|
|
sqlite_db.log_query(query, response) |
|
|
|
|
|
|
|
|
history.append((query, response)) |
|
|
return "", history |
|
|
|
|
|
def process_file_upload(files): |
|
|
"""Process uploaded files and index them""" |
|
|
file_info = [] |
|
|
for file in files: |
|
|
file_path = file.name |
|
|
file_name = os.path.basename(file_path) |
|
|
file_type = os.path.splitext(file_name)[1].lower() |
|
|
|
|
|
|
|
|
text_chunks = document_parser.parse_document(file_path) |
|
|
|
|
|
|
|
|
doc_id = sqlite_db.add_document(file_name, file_path, file_type) |
|
|
|
|
|
|
|
|
vector_db.add_document(file_path, text_chunks, {"doc_id": doc_id}) |
|
|
|
|
|
file_info.append(f"Indexed: {file_name} ({len(text_chunks)} chunks)") |
|
|
|
|
|
return "\n".join(file_info) |
|
|
|
|
|
def process_voice_input(audio_path): |
|
|
"""Process voice input and return transcribed text""" |
|
|
if audio_path is None: |
|
|
return "No audio recorded" |
|
|
|
|
|
|
|
|
text = voice_assistant.speech_to_text(audio_path) |
|
|
return text |
|
|
|
|
|
def text_to_speech_output(text): |
|
|
"""Convert text to speech""" |
|
|
if not text: |
|
|
return None |
|
|
|
|
|
audio_path = voice_assistant.text_to_speech(text) |
|
|
return audio_path |
|
|
|
|
|
def load_csv_to_sqlite(file_path, conn): |
|
|
|
|
|
chunksize = 1000 |
|
|
for chunk in pd.read_csv(file_path, chunksize=chunksize): |
|
|
|
|
|
if 'pickup_datetime' in chunk.columns: |
|
|
chunk['pickup_datetime'] = pd.to_datetime(chunk['pickup_datetime'], errors='coerce') |
|
|
chunk = chunk.dropna(subset=['pickup_datetime']) |
|
|
|
|
|
|
|
|
chunk.to_sql('data_tab', conn, if_exists='append', index=False, method='multi') |
|
|
|
|
|
|
|
|
with gr.Blocks(title="AI Document Analysis & Voice Assistant") as demo: |
|
|
gr.Markdown("# π€ AI Document Analysis & Voice Assistant") |
|
|
gr.Markdown("Upload documents, ask questions, and get voice responses!") |
|
|
|
|
|
with gr.Tab("Chat"): |
|
|
chatbot = gr.Chatbot(height=400) |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=8): |
|
|
msg = gr.Textbox( |
|
|
placeholder="Ask a question about your documents...", |
|
|
show_label=False |
|
|
) |
|
|
with gr.Column(scale=1): |
|
|
voice_btn = gr.Button("π€") |
|
|
|
|
|
with gr.Row(): |
|
|
submit_btn = gr.Button("Submit") |
|
|
clear_btn = gr.Button("Clear") |
|
|
|
|
|
audio_output = gr.Audio(label="Voice Response", type="filepath") |
|
|
|
|
|
|
|
|
voice_input = gr.Audio( |
|
|
label="Voice Input", |
|
|
type="filepath", |
|
|
visible=False |
|
|
) |
|
|
|
|
|
|
|
|
submit_btn.click( |
|
|
process_text_query, |
|
|
inputs=[msg, chatbot], |
|
|
outputs=[msg, chatbot] |
|
|
) |
|
|
|
|
|
msg.submit( |
|
|
process_text_query, |
|
|
inputs=[msg, chatbot], |
|
|
outputs=[msg, chatbot] |
|
|
) |
|
|
|
|
|
clear_btn.click(lambda: None, None, chatbot, queue=False) |
|
|
|
|
|
voice_btn.click( |
|
|
lambda: gr.update(visible=True), |
|
|
None, |
|
|
voice_input |
|
|
) |
|
|
|
|
|
voice_input.change( |
|
|
process_voice_input, |
|
|
inputs=[voice_input], |
|
|
outputs=[msg] |
|
|
) |
|
|
|
|
|
|
|
|
tts_btn = gr.Button("π Speak Response") |
|
|
tts_btn.click( |
|
|
text_to_speech_output, |
|
|
inputs=[chatbot], |
|
|
outputs=[audio_output] |
|
|
) |
|
|
|
|
|
with gr.Tab("Document Upload"): |
|
|
file_upload = gr.File( |
|
|
label="Upload Documents", |
|
|
file_types=[".pdf", ".txt", ".docx", ".csv", ".xlsx"], |
|
|
file_count="multiple" |
|
|
) |
|
|
upload_button = gr.Button("Process & Index Documents") |
|
|
upload_output = gr.Textbox(label="Upload Status") |
|
|
|
|
|
upload_button.click( |
|
|
process_file_upload, |
|
|
inputs=[file_upload], |
|
|
outputs=[upload_output] |
|
|
) |
|
|
|
|
|
with gr.Tab("Settings"): |
|
|
gr.Markdown("## System Settings") |
|
|
api_key = gr.Textbox( |
|
|
label="Groq API Key", |
|
|
placeholder="Enter your Groq API key", |
|
|
type="password" |
|
|
) |
|
|
save_btn = gr.Button("Save Settings") |
|
|
|
|
|
def save_settings(key): |
|
|
os.environ["GROQ_API_KEY"] = key |
|
|
return "Settings saved!" |
|
|
|
|
|
save_btn.click( |
|
|
save_settings, |
|
|
inputs=[api_key], |
|
|
outputs=[gr.Textbox(label="Status")] |
|
|
) |
|
|
|
|
|
with gr.Tab("Advanced Query"): |
|
|
gr.Markdown("# π§ Complex Query Processing") |
|
|
gr.Markdown("Use AI agents to process complex queries about your documents") |
|
|
|
|
|
complex_chatbot = gr.Chatbot(height=400) |
|
|
|
|
|
with gr.Row(): |
|
|
complex_msg = gr.Textbox( |
|
|
placeholder="Ask a complex question requiring analysis...", |
|
|
show_label=False |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
complex_submit_btn = gr.Button("Process with Agents") |
|
|
complex_clear_btn = gr.Button("Clear") |
|
|
|
|
|
|
|
|
complex_submit_btn.click( |
|
|
process_complex_query, |
|
|
inputs=[complex_msg, complex_chatbot], |
|
|
outputs=[complex_msg, complex_chatbot] |
|
|
) |
|
|
|
|
|
complex_msg.submit( |
|
|
process_complex_query, |
|
|
inputs=[complex_msg, complex_chatbot], |
|
|
outputs=[complex_msg, complex_chatbot] |
|
|
) |
|
|
|
|
|
complex_clear_btn.click(lambda: None, None, complex_chatbot, queue=False) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |