import gradio as gr import os from io import BytesIO from docx import Document from together import Together # ------------------ TEXT EXTRACTION ------------------ def extract_text_from_docx(docx_file): """Extract text from a DOCX file""" try: if isinstance(docx_file, bytes): file_obj = BytesIO(docx_file) elif hasattr(docx_file, 'read'): file_bytes = docx_file.read() file_obj = BytesIO(file_bytes) if hasattr(docx_file, 'seek'): docx_file.seek(0) else: file_obj = docx_file document = Document(file_obj) text = "\n".join([para.text for para in document.paragraphs]) if not text.strip(): return "No text could be extracted from the DOCX file." return text except Exception as e: return f"Error extracting text from DOCX: {str(e)}" # ------------------ CHAT FUNCTION ------------------ def chat_with_docx(api_key, docx_text, user_question, history): """Chat with the DOCX using Together API""" if not api_key.strip(): return history + [(user_question, "❌ Please enter your Together API key.")], history if not docx_text.strip() or docx_text.startswith("Error") or docx_text.startswith("No text"): return history + [(user_question, "⚠️ Please upload a valid DOCX file with extractable text first.")], history if not user_question.strip(): return history + [(user_question, "⚠️ Please enter a question.")], history try: client = Together(api_key=api_key) max_context_length = 10000 if len(docx_text) > max_context_length: half = max_context_length // 2 docx_context = docx_text[:half] + "\n\n[...Content truncated...]\n\n" + docx_text[-half:] else: docx_context = docx_text system_message = f"""You are an intelligent assistant designed to read and understand DOCX documents. Based on the user's questions, provide answers grounded only in the document below. DOCX CONTENT: {docx_context} Only answer based on the document above. If the answer isn't there, say so politely.""" messages = [{"role": "system", "content": system_message}] for h_user, h_bot in history: messages.append({"role": "user", "content": h_user}) messages.append({"role": "assistant", "content": h_bot}) messages.append({"role": "user", "content": user_question}) response = client.chat.completions.create( model="meta-llama/Llama-3.3-70B-Instruct-Turbo-Free", messages=messages, max_tokens=5000, temperature=0.7, ) assistant_response = response.choices[0].message.content return history + [(user_question, assistant_response)], history + [(user_question, assistant_response)] except Exception as e: return history + [(user_question, f"❌ Error: {str(e)}")], history # ------------------ FILE PROCESSING ------------------ def process_docx(docx_file, api_key_input): """Process the uploaded DOCX file""" if docx_file is None: return "⚠️ Please upload a DOCX file.", "", [] try: file_name = os.path.basename(docx_file.name) if hasattr(docx_file, 'name') else "Uploaded DOCX" docx_text = extract_text_from_docx(docx_file) if docx_text.startswith("Error"): return f"❌ {docx_text}", "", [] if not docx_text.strip() or docx_text.startswith("No text"): return f"⚠️ {docx_text}", "", [] word_count = len(docx_text.split()) status_message = f"✅ Successfully processed DOCX: {file_name} ({word_count} words extracted)" return status_message, docx_text, [] except Exception as e: return f"❌ Error processing DOCX: {str(e)}", "", [] def validate_api_key(api_key): if not api_key or not api_key.strip(): return "❌ API Key is required" if len(api_key.strip()) < 10: return "❌ API Key appears to be too short" return "✓ API Key format looks valid" # ------------------ GRADIO APP ------------------ # with gr.Blocks(title="ChatDOCX with Together AI") as app: with gr.Blocks( theme=gr.themes.Soft(), title="ChatDOCX with Together AI", ) as app: gr.Markdown("# 📄 ChatDOCX with Together AI") gr.Markdown("Upload a DOCX file and chat with it using the Llama-3.3-70B model.") with gr.Row(): with gr.Column(scale=1): api_key_input = gr.Textbox(label="Together API Key", placeholder="Enter your Together API key...", type="password") api_key_status = gr.Textbox(label="API Key Status", interactive=False) docx_file = gr.File(label="Upload DOCX", file_types=[".doc", ".docx"], type="binary") process_button = gr.Button("Process DOCX") status_message = gr.Textbox(label="Status", interactive=False) docx_text = gr.Textbox(visible=False) with gr.Accordion("DOCX Content Preview", open=False): docx_preview = gr.Textbox(label="Extracted Text Preview", interactive=False, max_lines=10, show_copy_button=True) with gr.Column(scale=2): chatbot = gr.Chatbot(label="Chat with DOCX", height=500) question = gr.Textbox(label="Ask a question about the DOCX", placeholder="What is the main topic of this document?", lines=2) submit_button = gr.Button("Submit Question") def update_preview(text): if not text or text.startswith("Error") or text.startswith("No text"): return text preview = text[:500] if len(text) > 500: preview += "...\n[Text truncated for preview. Full text will be used for chat.]" return preview api_key_input.change(validate_api_key, inputs=api_key_input, outputs=api_key_status) process_button.click( process_docx, inputs=[docx_file, api_key_input], outputs=[status_message, docx_text, chatbot] ).then( update_preview, inputs=[docx_text], outputs=[docx_preview] ) submit_button.click( chat_with_docx, inputs=[api_key_input, docx_text, question, chatbot], outputs=[chatbot, chatbot] ).then(lambda: "", outputs=question) question.submit( chat_with_docx, inputs=[api_key_input, docx_text, question, chatbot], outputs=[chatbot, chatbot] ).then(lambda: "", outputs=question) if __name__ == "__main__": app.launch(share=True)