Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import os | |
| from io import BytesIO | |
| from docx import Document | |
| from together import Together | |
| # ------------------ TEXT EXTRACTION ------------------ | |
| def extract_text_from_docx(docx_file): | |
| """Extract text from a DOCX file""" | |
| try: | |
| if isinstance(docx_file, bytes): | |
| file_obj = BytesIO(docx_file) | |
| elif hasattr(docx_file, 'read'): | |
| file_bytes = docx_file.read() | |
| file_obj = BytesIO(file_bytes) | |
| if hasattr(docx_file, 'seek'): | |
| docx_file.seek(0) | |
| else: | |
| file_obj = docx_file | |
| document = Document(file_obj) | |
| text = "\n".join([para.text for para in document.paragraphs]) | |
| if not text.strip(): | |
| return "No text could be extracted from the DOCX file." | |
| return text | |
| except Exception as e: | |
| return f"Error extracting text from DOCX: {str(e)}" | |
| # ------------------ CHAT FUNCTION ------------------ | |
| def chat_with_docx(api_key, docx_text, user_question, history): | |
| """Chat with the DOCX using Together API""" | |
| if not api_key.strip(): | |
| return history + [(user_question, "β Please enter your Together API key.")], history | |
| if not docx_text.strip() or docx_text.startswith("Error") or docx_text.startswith("No text"): | |
| return history + [(user_question, "β οΈ Please upload a valid DOCX file with extractable text first.")], history | |
| if not user_question.strip(): | |
| return history + [(user_question, "β οΈ Please enter a question.")], history | |
| try: | |
| client = Together(api_key=api_key) | |
| max_context_length = 10000 | |
| if len(docx_text) > max_context_length: | |
| half = max_context_length // 2 | |
| docx_context = docx_text[:half] + "\n\n[...Content truncated...]\n\n" + docx_text[-half:] | |
| else: | |
| docx_context = docx_text | |
| system_message = f"""You are an intelligent assistant designed to read and understand DOCX documents. | |
| Based on the user's questions, provide answers grounded only in the document below. | |
| DOCX CONTENT: | |
| {docx_context} | |
| Only answer based on the document above. If the answer isn't there, say so politely.""" | |
| messages = [{"role": "system", "content": system_message}] | |
| for h_user, h_bot in history: | |
| messages.append({"role": "user", "content": h_user}) | |
| messages.append({"role": "assistant", "content": h_bot}) | |
| messages.append({"role": "user", "content": user_question}) | |
| response = client.chat.completions.create( | |
| model="meta-llama/Llama-3.3-70B-Instruct-Turbo-Free", | |
| messages=messages, | |
| max_tokens=5000, | |
| temperature=0.7, | |
| ) | |
| assistant_response = response.choices[0].message.content | |
| return history + [(user_question, assistant_response)], history + [(user_question, assistant_response)] | |
| except Exception as e: | |
| return history + [(user_question, f"β Error: {str(e)}")], history | |
| # ------------------ FILE PROCESSING ------------------ | |
| def process_docx(docx_file, api_key_input): | |
| """Process the uploaded DOCX file""" | |
| if docx_file is None: | |
| return "β οΈ Please upload a DOCX file.", "", [] | |
| try: | |
| file_name = os.path.basename(docx_file.name) if hasattr(docx_file, 'name') else "Uploaded DOCX" | |
| docx_text = extract_text_from_docx(docx_file) | |
| if docx_text.startswith("Error"): | |
| return f"β {docx_text}", "", [] | |
| if not docx_text.strip() or docx_text.startswith("No text"): | |
| return f"β οΈ {docx_text}", "", [] | |
| word_count = len(docx_text.split()) | |
| status_message = f"β Successfully processed DOCX: {file_name} ({word_count} words extracted)" | |
| return status_message, docx_text, [] | |
| except Exception as e: | |
| return f"β Error processing DOCX: {str(e)}", "", [] | |
| def validate_api_key(api_key): | |
| if not api_key or not api_key.strip(): | |
| return "β API Key is required" | |
| if len(api_key.strip()) < 10: | |
| return "β API Key appears to be too short" | |
| return "β API Key format looks valid" | |
| # ------------------ GRADIO APP ------------------ | |
| # with gr.Blocks(title="ChatDOCX with Together AI") as app: | |
| with gr.Blocks( | |
| theme=gr.themes.Soft(), | |
| title="ChatDOCX with Together AI", | |
| ) as app: | |
| gr.Markdown("# π ChatDOCX with Together AI") | |
| gr.Markdown("Upload a DOCX file and chat with it using the Llama-3.3-70B model.") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| api_key_input = gr.Textbox(label="Together API Key", placeholder="Enter your Together API key...", type="password") | |
| api_key_status = gr.Textbox(label="API Key Status", | |
| interactive=False) | |
| docx_file = gr.File(label="Upload DOCX", file_types=[".doc", ".docx"], type="binary") | |
| process_button = gr.Button("Process DOCX") | |
| status_message = gr.Textbox(label="Status", interactive=False) | |
| docx_text = gr.Textbox(visible=False) | |
| with gr.Accordion("DOCX Content Preview", open=False): | |
| docx_preview = gr.Textbox(label="Extracted Text Preview", interactive=False, max_lines=10, show_copy_button=True) | |
| with gr.Column(scale=2): | |
| chatbot = gr.Chatbot(label="Chat with DOCX", height=500) | |
| question = gr.Textbox(label="Ask a question about the DOCX", placeholder="What is the main topic of this document?", lines=2) | |
| submit_button = gr.Button("Submit Question") | |
| def update_preview(text): | |
| if not text or text.startswith("Error") or text.startswith("No text"): | |
| return text | |
| preview = text[:500] | |
| if len(text) > 500: | |
| preview += "...\n[Text truncated for preview. Full text will be used for chat.]" | |
| return preview | |
| api_key_input.change(validate_api_key, | |
| inputs=api_key_input, | |
| outputs=api_key_status) | |
| process_button.click( | |
| process_docx, | |
| inputs=[docx_file, api_key_input], | |
| outputs=[status_message, docx_text, chatbot] | |
| ).then( | |
| update_preview, | |
| inputs=[docx_text], | |
| outputs=[docx_preview] | |
| ) | |
| submit_button.click( | |
| chat_with_docx, | |
| inputs=[api_key_input, docx_text, question, chatbot], | |
| outputs=[chatbot, chatbot] | |
| ).then(lambda: "", outputs=question) | |
| question.submit( | |
| chat_with_docx, | |
| inputs=[api_key_input, docx_text, question, chatbot], | |
| outputs=[chatbot, chatbot] | |
| ).then(lambda: "", outputs=question) | |
| if __name__ == "__main__": | |
| app.launch(share=True) | |