chatwithdoc / app.py
sajjadrahman56's picture
Upload 2 files
257d1e2 verified
import gradio as gr
import os
from io import BytesIO
from docx import Document
from together import Together
# ------------------ TEXT EXTRACTION ------------------
def extract_text_from_docx(docx_file):
"""Extract text from a DOCX file"""
try:
if isinstance(docx_file, bytes):
file_obj = BytesIO(docx_file)
elif hasattr(docx_file, 'read'):
file_bytes = docx_file.read()
file_obj = BytesIO(file_bytes)
if hasattr(docx_file, 'seek'):
docx_file.seek(0)
else:
file_obj = docx_file
document = Document(file_obj)
text = "\n".join([para.text for para in document.paragraphs])
if not text.strip():
return "No text could be extracted from the DOCX file."
return text
except Exception as e:
return f"Error extracting text from DOCX: {str(e)}"
# ------------------ CHAT FUNCTION ------------------
def chat_with_docx(api_key, docx_text, user_question, history):
"""Chat with the DOCX using Together API"""
if not api_key.strip():
return history + [(user_question, "❌ Please enter your Together API key.")], history
if not docx_text.strip() or docx_text.startswith("Error") or docx_text.startswith("No text"):
return history + [(user_question, "⚠️ Please upload a valid DOCX file with extractable text first.")], history
if not user_question.strip():
return history + [(user_question, "⚠️ Please enter a question.")], history
try:
client = Together(api_key=api_key)
max_context_length = 10000
if len(docx_text) > max_context_length:
half = max_context_length // 2
docx_context = docx_text[:half] + "\n\n[...Content truncated...]\n\n" + docx_text[-half:]
else:
docx_context = docx_text
system_message = f"""You are an intelligent assistant designed to read and understand DOCX documents.
Based on the user's questions, provide answers grounded only in the document below.
DOCX CONTENT:
{docx_context}
Only answer based on the document above. If the answer isn't there, say so politely."""
messages = [{"role": "system", "content": system_message}]
for h_user, h_bot in history:
messages.append({"role": "user", "content": h_user})
messages.append({"role": "assistant", "content": h_bot})
messages.append({"role": "user", "content": user_question})
response = client.chat.completions.create(
model="meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
messages=messages,
max_tokens=5000,
temperature=0.7,
)
assistant_response = response.choices[0].message.content
return history + [(user_question, assistant_response)], history + [(user_question, assistant_response)]
except Exception as e:
return history + [(user_question, f"❌ Error: {str(e)}")], history
# ------------------ FILE PROCESSING ------------------
def process_docx(docx_file, api_key_input):
"""Process the uploaded DOCX file"""
if docx_file is None:
return "⚠️ Please upload a DOCX file.", "", []
try:
file_name = os.path.basename(docx_file.name) if hasattr(docx_file, 'name') else "Uploaded DOCX"
docx_text = extract_text_from_docx(docx_file)
if docx_text.startswith("Error"):
return f"❌ {docx_text}", "", []
if not docx_text.strip() or docx_text.startswith("No text"):
return f"⚠️ {docx_text}", "", []
word_count = len(docx_text.split())
status_message = f"βœ… Successfully processed DOCX: {file_name} ({word_count} words extracted)"
return status_message, docx_text, []
except Exception as e:
return f"❌ Error processing DOCX: {str(e)}", "", []
def validate_api_key(api_key):
if not api_key or not api_key.strip():
return "❌ API Key is required"
if len(api_key.strip()) < 10:
return "❌ API Key appears to be too short"
return "βœ“ API Key format looks valid"
# ------------------ GRADIO APP ------------------
# with gr.Blocks(title="ChatDOCX with Together AI") as app:
with gr.Blocks(
theme=gr.themes.Soft(),
title="ChatDOCX with Together AI",
) as app:
gr.Markdown("# πŸ“„ ChatDOCX with Together AI")
gr.Markdown("Upload a DOCX file and chat with it using the Llama-3.3-70B model.")
with gr.Row():
with gr.Column(scale=1):
api_key_input = gr.Textbox(label="Together API Key", placeholder="Enter your Together API key...", type="password")
api_key_status = gr.Textbox(label="API Key Status",
interactive=False)
docx_file = gr.File(label="Upload DOCX", file_types=[".doc", ".docx"], type="binary")
process_button = gr.Button("Process DOCX")
status_message = gr.Textbox(label="Status", interactive=False)
docx_text = gr.Textbox(visible=False)
with gr.Accordion("DOCX Content Preview", open=False):
docx_preview = gr.Textbox(label="Extracted Text Preview", interactive=False, max_lines=10, show_copy_button=True)
with gr.Column(scale=2):
chatbot = gr.Chatbot(label="Chat with DOCX", height=500)
question = gr.Textbox(label="Ask a question about the DOCX", placeholder="What is the main topic of this document?", lines=2)
submit_button = gr.Button("Submit Question")
def update_preview(text):
if not text or text.startswith("Error") or text.startswith("No text"):
return text
preview = text[:500]
if len(text) > 500:
preview += "...\n[Text truncated for preview. Full text will be used for chat.]"
return preview
api_key_input.change(validate_api_key,
inputs=api_key_input,
outputs=api_key_status)
process_button.click(
process_docx,
inputs=[docx_file, api_key_input],
outputs=[status_message, docx_text, chatbot]
).then(
update_preview,
inputs=[docx_text],
outputs=[docx_preview]
)
submit_button.click(
chat_with_docx,
inputs=[api_key_input, docx_text, question, chatbot],
outputs=[chatbot, chatbot]
).then(lambda: "", outputs=question)
question.submit(
chat_with_docx,
inputs=[api_key_input, docx_text, question, chatbot],
outputs=[chatbot, chatbot]
).then(lambda: "", outputs=question)
if __name__ == "__main__":
app.launch(share=True)