sajjadrahman56 commited on
Commit
257d1e2
·
verified ·
1 Parent(s): 63a13d1

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +183 -0
  2. requerments.txt +3 -0
app.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ from io import BytesIO
4
+ from docx import Document
5
+ from together import Together
6
+
7
+ # ------------------ TEXT EXTRACTION ------------------
8
+
9
+ def extract_text_from_docx(docx_file):
10
+ """Extract text from a DOCX file"""
11
+ try:
12
+ if isinstance(docx_file, bytes):
13
+ file_obj = BytesIO(docx_file)
14
+ elif hasattr(docx_file, 'read'):
15
+ file_bytes = docx_file.read()
16
+ file_obj = BytesIO(file_bytes)
17
+ if hasattr(docx_file, 'seek'):
18
+ docx_file.seek(0)
19
+ else:
20
+ file_obj = docx_file
21
+
22
+ document = Document(file_obj)
23
+ text = "\n".join([para.text for para in document.paragraphs])
24
+
25
+ if not text.strip():
26
+ return "No text could be extracted from the DOCX file."
27
+ return text
28
+
29
+ except Exception as e:
30
+ return f"Error extracting text from DOCX: {str(e)}"
31
+
32
+ # ------------------ CHAT FUNCTION ------------------
33
+
34
+ def chat_with_docx(api_key, docx_text, user_question, history):
35
+ """Chat with the DOCX using Together API"""
36
+ if not api_key.strip():
37
+ return history + [(user_question, "❌ Please enter your Together API key.")], history
38
+
39
+ if not docx_text.strip() or docx_text.startswith("Error") or docx_text.startswith("No text"):
40
+ return history + [(user_question, "⚠️ Please upload a valid DOCX file with extractable text first.")], history
41
+
42
+ if not user_question.strip():
43
+ return history + [(user_question, "⚠️ Please enter a question.")], history
44
+
45
+ try:
46
+ client = Together(api_key=api_key)
47
+ max_context_length = 10000
48
+
49
+ if len(docx_text) > max_context_length:
50
+ half = max_context_length // 2
51
+ docx_context = docx_text[:half] + "\n\n[...Content truncated...]\n\n" + docx_text[-half:]
52
+ else:
53
+ docx_context = docx_text
54
+
55
+ system_message = f"""You are an intelligent assistant designed to read and understand DOCX documents.
56
+ Based on the user's questions, provide answers grounded only in the document below.
57
+
58
+ DOCX CONTENT:
59
+ {docx_context}
60
+
61
+ Only answer based on the document above. If the answer isn't there, say so politely."""
62
+
63
+ messages = [{"role": "system", "content": system_message}]
64
+ for h_user, h_bot in history:
65
+ messages.append({"role": "user", "content": h_user})
66
+ messages.append({"role": "assistant", "content": h_bot})
67
+ messages.append({"role": "user", "content": user_question})
68
+
69
+ response = client.chat.completions.create(
70
+ model="meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
71
+ messages=messages,
72
+ max_tokens=5000,
73
+ temperature=0.7,
74
+ )
75
+
76
+ assistant_response = response.choices[0].message.content
77
+ return history + [(user_question, assistant_response)], history + [(user_question, assistant_response)]
78
+
79
+ except Exception as e:
80
+ return history + [(user_question, f"❌ Error: {str(e)}")], history
81
+
82
+ # ------------------ FILE PROCESSING ------------------
83
+
84
+ def process_docx(docx_file, api_key_input):
85
+ """Process the uploaded DOCX file"""
86
+ if docx_file is None:
87
+ return "⚠️ Please upload a DOCX file.", "", []
88
+
89
+ try:
90
+ file_name = os.path.basename(docx_file.name) if hasattr(docx_file, 'name') else "Uploaded DOCX"
91
+ docx_text = extract_text_from_docx(docx_file)
92
+
93
+ if docx_text.startswith("Error"):
94
+ return f"❌ {docx_text}", "", []
95
+
96
+ if not docx_text.strip() or docx_text.startswith("No text"):
97
+ return f"⚠️ {docx_text}", "", []
98
+
99
+ word_count = len(docx_text.split())
100
+ status_message = f"✅ Successfully processed DOCX: {file_name} ({word_count} words extracted)"
101
+ return status_message, docx_text, []
102
+
103
+ except Exception as e:
104
+ return f"❌ Error processing DOCX: {str(e)}", "", []
105
+
106
+ def validate_api_key(api_key):
107
+ if not api_key or not api_key.strip():
108
+ return "❌ API Key is required"
109
+ if len(api_key.strip()) < 10:
110
+ return "❌ API Key appears to be too short"
111
+ return "✓ API Key format looks valid"
112
+
113
+ # ------------------ GRADIO APP ------------------
114
+
115
+ # with gr.Blocks(title="ChatDOCX with Together AI") as app:
116
+
117
+ with gr.Blocks(
118
+ theme=gr.themes.Soft(),
119
+ title="ChatDOCX with Together AI",
120
+
121
+ ) as app:
122
+
123
+
124
+ gr.Markdown("# 📄 ChatDOCX with Together AI")
125
+ gr.Markdown("Upload a DOCX file and chat with it using the Llama-3.3-70B model.")
126
+
127
+ with gr.Row():
128
+ with gr.Column(scale=1):
129
+ api_key_input = gr.Textbox(label="Together API Key", placeholder="Enter your Together API key...", type="password")
130
+ api_key_status = gr.Textbox(label="API Key Status",
131
+ interactive=False)
132
+
133
+
134
+
135
+ docx_file = gr.File(label="Upload DOCX", file_types=[".doc", ".docx"], type="binary")
136
+ process_button = gr.Button("Process DOCX")
137
+ status_message = gr.Textbox(label="Status", interactive=False)
138
+ docx_text = gr.Textbox(visible=False)
139
+
140
+ with gr.Accordion("DOCX Content Preview", open=False):
141
+ docx_preview = gr.Textbox(label="Extracted Text Preview", interactive=False, max_lines=10, show_copy_button=True)
142
+
143
+ with gr.Column(scale=2):
144
+ chatbot = gr.Chatbot(label="Chat with DOCX", height=500)
145
+ question = gr.Textbox(label="Ask a question about the DOCX", placeholder="What is the main topic of this document?", lines=2)
146
+ submit_button = gr.Button("Submit Question")
147
+
148
+ def update_preview(text):
149
+ if not text or text.startswith("Error") or text.startswith("No text"):
150
+ return text
151
+ preview = text[:500]
152
+ if len(text) > 500:
153
+ preview += "...\n[Text truncated for preview. Full text will be used for chat.]"
154
+ return preview
155
+
156
+ api_key_input.change(validate_api_key,
157
+ inputs=api_key_input,
158
+ outputs=api_key_status)
159
+
160
+ process_button.click(
161
+ process_docx,
162
+ inputs=[docx_file, api_key_input],
163
+ outputs=[status_message, docx_text, chatbot]
164
+ ).then(
165
+ update_preview,
166
+ inputs=[docx_text],
167
+ outputs=[docx_preview]
168
+ )
169
+
170
+ submit_button.click(
171
+ chat_with_docx,
172
+ inputs=[api_key_input, docx_text, question, chatbot],
173
+ outputs=[chatbot, chatbot]
174
+ ).then(lambda: "", outputs=question)
175
+
176
+ question.submit(
177
+ chat_with_docx,
178
+ inputs=[api_key_input, docx_text, question, chatbot],
179
+ outputs=[chatbot, chatbot]
180
+ ).then(lambda: "", outputs=question)
181
+
182
+ if __name__ == "__main__":
183
+ app.launch(share=True)
requerments.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ python-docx
2
+ PyPDF2
3
+ gardio