Spaces:

sajjadrahman56
/

chatwithdoc

Sleeping

App Files Files Community

chatwithdoc / app.py

sajjadrahman56

Upload 2 files

257d1e2 verified 10 months ago

raw

history blame contribute delete

6.87 kB

	import gradio as gr
	import os
	from io import BytesIO
	from docx import Document
	from together import Together

	# ------------------ TEXT EXTRACTION ------------------

	def extract_text_from_docx(docx_file):
	"""Extract text from a DOCX file"""
	try:
	if isinstance(docx_file, bytes):
	file_obj = BytesIO(docx_file)
	elif hasattr(docx_file, 'read'):
	file_bytes = docx_file.read()
	file_obj = BytesIO(file_bytes)
	if hasattr(docx_file, 'seek'):
	docx_file.seek(0)
	else:
	file_obj = docx_file

	document = Document(file_obj)
	text = "\n".join([para.text for para in document.paragraphs])

	if not text.strip():
	return "No text could be extracted from the DOCX file."
	return text

	except Exception as e:
	return f"Error extracting text from DOCX: {str(e)}"

	# ------------------ CHAT FUNCTION ------------------

	def chat_with_docx(api_key, docx_text, user_question, history):
	"""Chat with the DOCX using Together API"""
	if not api_key.strip():
	return history + [(user_question, "❌ Please enter your Together API key.")], history

	if not docx_text.strip() or docx_text.startswith("Error") or docx_text.startswith("No text"):
	return history + [(user_question, "⚠️ Please upload a valid DOCX file with extractable text first.")], history

	if not user_question.strip():
	return history + [(user_question, "⚠️ Please enter a question.")], history

	try:
	client = Together(api_key=api_key)
	max_context_length = 10000

	if len(docx_text) > max_context_length:
	half = max_context_length // 2
	docx_context = docx_text[:half] + "\n\n[...Content truncated...]\n\n" + docx_text[-half:]
	else:
	docx_context = docx_text

	system_message = f"""You are an intelligent assistant designed to read and understand DOCX documents.
	Based on the user's questions, provide answers grounded only in the document below.

	DOCX CONTENT:
	{docx_context}

	Only answer based on the document above. If the answer isn't there, say so politely."""

	messages = [{"role": "system", "content": system_message}]
	for h_user, h_bot in history:
	messages.append({"role": "user", "content": h_user})
	messages.append({"role": "assistant", "content": h_bot})
	messages.append({"role": "user", "content": user_question})

	response = client.chat.completions.create(
	model="meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
	messages=messages,
	max_tokens=5000,
	temperature=0.7,
	)

	assistant_response = response.choices[0].message.content
	return history + [(user_question, assistant_response)], history + [(user_question, assistant_response)]

	except Exception as e:
	return history + [(user_question, f"❌ Error: {str(e)}")], history

	# ------------------ FILE PROCESSING ------------------

	def process_docx(docx_file, api_key_input):
	"""Process the uploaded DOCX file"""
	if docx_file is None:
	return "⚠️ Please upload a DOCX file.", "", []

	try:
	file_name = os.path.basename(docx_file.name) if hasattr(docx_file, 'name') else "Uploaded DOCX"
	docx_text = extract_text_from_docx(docx_file)

	if docx_text.startswith("Error"):
	return f"❌ {docx_text}", "", []

	if not docx_text.strip() or docx_text.startswith("No text"):
	return f"⚠️ {docx_text}", "", []

	word_count = len(docx_text.split())
	status_message = f"✅ Successfully processed DOCX: {file_name} ({word_count} words extracted)"
	return status_message, docx_text, []

	except Exception as e:
	return f"❌ Error processing DOCX: {str(e)}", "", []

	def validate_api_key(api_key):
	if not api_key or not api_key.strip():
	return "❌ API Key is required"
	if len(api_key.strip()) < 10:
	return "❌ API Key appears to be too short"
	return "✓ API Key format looks valid"

	# ------------------ GRADIO APP ------------------

	# with gr.Blocks(title="ChatDOCX with Together AI") as app:

	with gr.Blocks(
	theme=gr.themes.Soft(),
	title="ChatDOCX with Together AI",

	) as app:


	gr.Markdown("# 📄 ChatDOCX with Together AI")
	gr.Markdown("Upload a DOCX file and chat with it using the Llama-3.3-70B model.")

	with gr.Row():
	with gr.Column(scale=1):
	api_key_input = gr.Textbox(label="Together API Key", placeholder="Enter your Together API key...", type="password")
	api_key_status = gr.Textbox(label="API Key Status",
	interactive=False)



	docx_file = gr.File(label="Upload DOCX", file_types=[".doc", ".docx"], type="binary")
	process_button = gr.Button("Process DOCX")
	status_message = gr.Textbox(label="Status", interactive=False)
	docx_text = gr.Textbox(visible=False)

	with gr.Accordion("DOCX Content Preview", open=False):
	docx_preview = gr.Textbox(label="Extracted Text Preview", interactive=False, max_lines=10, show_copy_button=True)

	with gr.Column(scale=2):
	chatbot = gr.Chatbot(label="Chat with DOCX", height=500)
	question = gr.Textbox(label="Ask a question about the DOCX", placeholder="What is the main topic of this document?", lines=2)
	submit_button = gr.Button("Submit Question")

	def update_preview(text):
	if not text or text.startswith("Error") or text.startswith("No text"):
	return text
	preview = text[:500]
	if len(text) > 500:
	preview += "...\n[Text truncated for preview. Full text will be used for chat.]"
	return preview

	api_key_input.change(validate_api_key,
	inputs=api_key_input,
	outputs=api_key_status)

	process_button.click(
	process_docx,
	inputs=[docx_file, api_key_input],
	outputs=[status_message, docx_text, chatbot]
	).then(
	update_preview,
	inputs=[docx_text],
	outputs=[docx_preview]
	)

	submit_button.click(
	chat_with_docx,
	inputs=[api_key_input, docx_text, question, chatbot],
	outputs=[chatbot, chatbot]
	).then(lambda: "", outputs=question)

	question.submit(
	chat_with_docx,
	inputs=[api_key_input, docx_text, question, chatbot],
	outputs=[chatbot, chatbot]
	).then(lambda: "", outputs=question)

	if __name__ == "__main__":
	app.launch(share=True)