Spaces:

sajjadrahman56
/

chatwithdoc

Sleeping

App Files Files Community

sajjadrahman56 commited on May 18, 2025

Commit

257d1e2

verified ·

1 Parent(s): 63a13d1

Upload 2 files

Browse files

Files changed (2) hide show

app.py +183 -0
requerments.txt +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,183 @@

+import gradio as gr
+import os
+from io import BytesIO
+from docx import Document
+from together import Together
+# ------------------ TEXT EXTRACTION ------------------
+def extract_text_from_docx(docx_file):
+    """Extract text from a DOCX file"""
+    try:
+        if isinstance(docx_file, bytes):
+            file_obj = BytesIO(docx_file)
+        elif hasattr(docx_file, 'read'):
+            file_bytes = docx_file.read()
+            file_obj = BytesIO(file_bytes)
+            if hasattr(docx_file, 'seek'):
+                docx_file.seek(0)
+        else:
+            file_obj = docx_file
+        document = Document(file_obj)
+        text = "\n".join([para.text for para in document.paragraphs])
+        if not text.strip():
+            return "No text could be extracted from the DOCX file."
+        return text
+    except Exception as e:
+        return f"Error extracting text from DOCX: {str(e)}"
+# ------------------ CHAT FUNCTION ------------------
+def chat_with_docx(api_key, docx_text, user_question, history):
+    """Chat with the DOCX using Together API"""
+    if not api_key.strip():
+        return history + [(user_question, "❌ Please enter your Together API key.")], history
+    if not docx_text.strip() or docx_text.startswith("Error") or docx_text.startswith("No text"):
+        return history + [(user_question, "⚠️ Please upload a valid DOCX file with extractable text first.")], history
+    if not user_question.strip():
+        return history + [(user_question, "⚠️ Please enter a question.")], history
+    try:
+        client = Together(api_key=api_key)
+        max_context_length = 10000
+        if len(docx_text) > max_context_length:
+            half = max_context_length // 2
+            docx_context = docx_text[:half] + "\n\n[...Content truncated...]\n\n" + docx_text[-half:]
+        else:
+            docx_context = docx_text
+        system_message = f"""You are an intelligent assistant designed to read and understand DOCX documents.
+Based on the user's questions, provide answers grounded only in the document below.
+DOCX CONTENT:
+{docx_context}
+Only answer based on the document above. If the answer isn't there, say so politely."""
+        messages = [{"role": "system", "content": system_message}]
+        for h_user, h_bot in history:
+            messages.append({"role": "user", "content": h_user})
+            messages.append({"role": "assistant", "content": h_bot})
+        messages.append({"role": "user", "content": user_question})
+        response = client.chat.completions.create(
+            model="meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+            messages=messages,
+            max_tokens=5000,
+            temperature=0.7,
+        )
+        assistant_response = response.choices[0].message.content
+        return history + [(user_question, assistant_response)], history + [(user_question, assistant_response)]
+    except Exception as e:
+        return history + [(user_question, f"❌ Error: {str(e)}")], history
+# ------------------ FILE PROCESSING ------------------
+def process_docx(docx_file, api_key_input):
+    """Process the uploaded DOCX file"""
+    if docx_file is None:
+        return "⚠️ Please upload a DOCX file.", "", []
+    try:
+        file_name = os.path.basename(docx_file.name) if hasattr(docx_file, 'name') else "Uploaded DOCX"
+        docx_text = extract_text_from_docx(docx_file)
+        if docx_text.startswith("Error"):
+            return f"❌ {docx_text}", "", []
+        if not docx_text.strip() or docx_text.startswith("No text"):
+            return f"⚠️ {docx_text}", "", []
+        word_count = len(docx_text.split())
+        status_message = f"✅ Successfully processed DOCX: {file_name} ({word_count} words extracted)"
+        return status_message, docx_text, []
+    except Exception as e:
+        return f"❌ Error processing DOCX: {str(e)}", "", []
+def validate_api_key(api_key):
+    if not api_key or not api_key.strip():
+        return "❌ API Key is required"
+    if len(api_key.strip()) < 10:
+        return "❌ API Key appears to be too short"
+    return "✓ API Key format looks valid"
+# ------------------ GRADIO APP ------------------
+# with gr.Blocks(title="ChatDOCX with Together AI") as app:
+with gr.Blocks(
+    theme=gr.themes.Soft(),
+    title="ChatDOCX with Together AI",
+) as app:
+    gr.Markdown("# 📄 ChatDOCX with Together AI")
+    gr.Markdown("Upload a DOCX file and chat with it using the Llama-3.3-70B model.")
+    with gr.Row():
+        with gr.Column(scale=1):
+            api_key_input = gr.Textbox(label="Together API Key", placeholder="Enter your Together API key...", type="password")
+            api_key_status = gr.Textbox(label="API Key Status",
+                                         interactive=False)
+            docx_file = gr.File(label="Upload DOCX", file_types=[".doc", ".docx"], type="binary")
+            process_button = gr.Button("Process DOCX")
+            status_message = gr.Textbox(label="Status", interactive=False)
+            docx_text = gr.Textbox(visible=False)
+            with gr.Accordion("DOCX Content Preview", open=False):
+                docx_preview = gr.Textbox(label="Extracted Text Preview", interactive=False, max_lines=10, show_copy_button=True)
+        with gr.Column(scale=2):
+            chatbot = gr.Chatbot(label="Chat with DOCX", height=500)
+            question = gr.Textbox(label="Ask a question about the DOCX", placeholder="What is the main topic of this document?", lines=2)
+            submit_button = gr.Button("Submit Question")
+    def update_preview(text):
+        if not text or text.startswith("Error") or text.startswith("No text"):
+            return text
+        preview = text[:500]
+        if len(text) > 500:
+            preview += "...\n[Text truncated for preview. Full text will be used for chat.]"
+        return preview
+    api_key_input.change(validate_api_key,
+                         inputs=api_key_input,
+                         outputs=api_key_status)
+    process_button.click(
+        process_docx,
+        inputs=[docx_file, api_key_input],
+        outputs=[status_message, docx_text, chatbot]
+    ).then(
+        update_preview,
+        inputs=[docx_text],
+        outputs=[docx_preview]
+    )
+    submit_button.click(
+        chat_with_docx,
+        inputs=[api_key_input, docx_text, question, chatbot],
+        outputs=[chatbot, chatbot]
+    ).then(lambda: "", outputs=question)
+    question.submit(
+        chat_with_docx,
+        inputs=[api_key_input, docx_text, question, chatbot],
+        outputs=[chatbot, chatbot]
+    ).then(lambda: "", outputs=question)
+if __name__ == "__main__":
+    app.launch(share=True)

requerments.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+python-docx
+PyPDF2
+gardio