ConAstralid / app.py
ratulsur's picture
Update app.py
3051b11 verified
import gradio as gr
import pandas as pd
import numpy as np
import faiss
import pypdf
import whisper
import os
from openai import OpenAI
from sentence_transformers import SentenceTransformer
from docx import Document
import csv
# Initialize DeepSeek API client
client = OpenAI(api_key="sk-6c8cb9fc17fb4c6b828de6290cbd76ca", base_url="https://api.deepseek.com")
# Load Whisper model for speech-to-text
whisper_model = whisper.load_model("base")
# Function to extract text from a PDF
def extract_text_from_pdf(pdf_path):
reader = pypdf.PdfReader(pdf_path)
text = ""
for page in reader.pages:
text += page.extract_text() + "\n"
return text.strip()
# Function to extract text from a Word document
def extract_text_from_word(doc_path):
doc = Document(doc_path)
text = "\n".join([para.text for para in doc.paragraphs])
return text.strip()
# Function to extract text from a CSV file
def extract_text_from_csv(csv_path):
text = ""
with open(csv_path, newline='', encoding='utf-8') as csvfile:
reader = csv.reader(csvfile)
for row in reader:
text += " | ".join(row) + "\n"
return text.strip()
# Function to process uploaded document
def process_uploaded_file(file_path):
ext = os.path.splitext(file_path)[1].lower()
if ext == ".pdf":
return extract_text_from_pdf(file_path)
elif ext in [".doc", ".docx"]:
return extract_text_from_word(file_path)
elif ext in [".csv"]:
return extract_text_from_csv(file_path)
else:
return "Unsupported file format. Please upload a PDF, Word, or CSV file."
# Function to split text into chunks
def split_text(text, chunk_size=2000):
return [text[i : i + chunk_size] for i in range(0, len(text), chunk_size)]
# Function to convert speech to text
def process_audio(audio_filepath):
"""Convert speech to text using Whisper ASR."""
if not audio_filepath:
return "⚠️ ERROR: No audio detected. Please try speaking again."
try:
result = whisper_model.transcribe(audio_filepath)
return result["text"] if result["text"] else "⚠️ ERROR: Speech not clear. Try again."
except Exception as e:
return f"⚠️ ERROR: Whisper failed - {str(e)}"
# Agentic AI Capabilities
# 1️⃣ Chain-of-Thought Reasoning
def chain_of_thought_reasoning(query):
"""Break down queries into logical steps."""
steps = [
"Step 1: Identify the relevant sections from the document.",
"Step 2: Retrieve the most relevant parts of the document.",
"Step 3: Summarize key points and provide a detailed explanation.",
"Step 4: Generate a user-friendly response."
]
return "\n".join(steps)
# 2️⃣ Multi-Hop Question Answering
def multi_hop_answering(query, document_text):
"""Retrieve multiple relevant sections for a more complete answer."""
relevant_sections = search_faiss(query, top_k=10)
reasoning = chain_of_thought_reasoning(query)
answer = chat_with_document(query, document_text, min_words=100, max_words=500)
return f"Reasoning:\n{reasoning}\n\nExtracted Information:\n{relevant_sections}\n\nFinal Answer:\n{answer}"
# 3️⃣ External Tool Use (Fallback for Missing Answers)
def tool_use(query):
"""If FAISS doesn’t find an answer, ask an external database."""
result = search_faiss(query)
if not result:
return "I could not find an answer in the document. Would you like me to search external sources?"
return result
# Function to generate responses using DeepSeek
def chat_with_document(query, document_text, min_words=50, max_words=500):
"""Retrieve relevant text and ask DeepSeek for an answer."""
relevant_context = tool_use(query) # Use FAISS first, else external tools
control_prompt = f"Generate an answer between {min_words}-{max_words} words."
response = client.chat.completions.create(
model="deepseek-reasoner",
messages=[
{"role": "system", "content": "You are an expert document analyst providing insights based on the uploaded document."},
{"role": "user", "content": f"{control_prompt}\n\nDocument: {document_text}\n\nUser Question: {query}"},
],
stream=False
)
return response.choices[0].message.content
# Gradio Interface
with gr.Blocks() as demo:
gr.Markdown("# 📄 Document Analyzer (Agentic AI)")
with gr.Row():
file_input = gr.File(label="Upload Document (PDF, Word, CSV)")
voice_input = gr.Audio(label="Speak your Question", type="filepath")
query_input = gr.Textbox(label="Ask a Question", elem_classes=["big-textbox"])
submit_button = gr.Button("Analyze", elem_classes=["big-button"])
response_output = gr.Textbox(label="Answer from Document", interactive=False, elem_classes=["big-response"])
def analyze_document(file, query):
if file:
document_text = process_uploaded_file(file)
return multi_hop_answering(query, document_text)
else:
return "⚠️ ERROR: No document uploaded. Please upload a file."
voice_input.change(process_audio, inputs=[voice_input], outputs=[query_input])
submit_button.click(analyze_document, inputs=[file_input, query_input], outputs=[response_output])
demo.launch()