OzanSevindir commited on
Commit
96ad218
·
verified ·
1 Parent(s): 908bd2e

Upload folder using huggingface_hub

Browse files
__pycache__/deep_research.cpython-312.pyc CHANGED
Binary files a/__pycache__/deep_research.cpython-312.pyc and b/__pycache__/deep_research.cpython-312.pyc differ
 
__pycache__/file_processor.cpython-312.pyc ADDED
Binary file (11.2 kB). View file
 
__pycache__/research_manager.cpython-312.pyc CHANGED
Binary files a/__pycache__/research_manager.cpython-312.pyc and b/__pycache__/research_manager.cpython-312.pyc differ
 
deep_research.py CHANGED
@@ -3,11 +3,12 @@ from dotenv import load_dotenv
3
  from research_manager import ResearchManager
4
  import markdown
5
  import re
 
6
 
7
  load_dotenv(override=True)
8
 
9
 
10
- async def run_research(query: str, model_choice: str, conversation_history: list, progress=gr.Progress()):
11
  """Run research and yield updates for both report and references"""
12
  status_messages = []
13
  final_report_md = ""
@@ -39,7 +40,7 @@ async def run_research(query: str, model_choice: str, conversation_history: list
39
  '''
40
 
41
  # Collect all chunks and parse structured messages
42
- async for chunk in ResearchManager(model_choice).run(query, conversation_history):
43
  # Parse structured messages (format: TYPE|data)
44
  if "|" in chunk:
45
  msg_type, msg_data = chunk.split("|", 1)
@@ -254,7 +255,7 @@ def format_references_html(references):
254
  return html
255
 
256
 
257
- async def run_simple_search(query: str, model_choice: str, conversation_history: list, progress=gr.Progress()):
258
  """Run a quick follow-up search without full research workflow"""
259
  progress(0, desc="🔍 Quick search starting...")
260
 
@@ -277,7 +278,7 @@ async def run_simple_search(query: str, model_choice: str, conversation_history:
277
 
278
  answer_text = ""
279
 
280
- async for chunk in ResearchManager(model_choice).run_simple_search(query, conversation_history):
281
  # Parse structured messages
282
  if "|" in chunk:
283
  msg_type, msg_data = chunk.split("|", 1)
@@ -779,6 +780,29 @@ textarea:not(.chat-input textarea):focus {
779
  box-shadow: 0 0 0 2px rgba(16, 185, 129, 0.15) !important;
780
  }
781
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
782
  /* Clear button */
783
  .clear-button {
784
  background: rgba(55, 55, 55, 0.4) !important;
@@ -802,6 +826,61 @@ textarea:not(.chat-input textarea):focus {
802
  transform: translateY(-1px) !important;
803
  }
804
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
805
  /* Modern Minimal Tabs */
806
  .tab-nav {
807
  background: transparent;
@@ -1074,6 +1153,8 @@ with gr.Blocks(theme=luntre_theme, css=custom_css, title="Luntre AI - Deep Resea
1074
  conversation_history_state = gr.State([])
1075
  # State to track current mode: "research" or "search"
1076
  current_mode_state = gr.State("research")
 
 
1077
 
1078
  # Header
1079
  gr.HTML("""
@@ -1099,6 +1180,19 @@ with gr.Blocks(theme=luntre_theme, css=custom_css, title="Luntre AI - Deep Resea
1099
 
1100
  # Input Section at the BOTTOM (chat-style)
1101
  with gr.Group(elem_classes="input-container-bottom"):
 
 
 
 
 
 
 
 
 
 
 
 
 
1102
  query_input = gr.Textbox(
1103
  label="",
1104
  placeholder="What would you like to research? (e.g., 'What are the latest developments in quantum computing?')",
@@ -1112,6 +1206,7 @@ with gr.Blocks(theme=luntre_theme, css=custom_css, title="Luntre AI - Deep Resea
1112
  with gr.Row(elem_classes="controls-row"):
1113
  research_mode_btn = gr.Button("Research", variant="primary", elem_classes="mode-button research-mode active-mode", elem_id="research-mode-btn")
1114
  search_mode_btn = gr.Button("Search", variant="secondary", elem_classes="mode-button search-mode", elem_id="search-mode-btn")
 
1115
  clear_conv_btn = gr.Button("🗑️ Clear", variant="secondary", elem_classes="clear-button")
1116
  model_selector = gr.Dropdown(
1117
  choices=[
@@ -1187,6 +1282,56 @@ with gr.Blocks(theme=luntre_theme, css=custom_css, title="Luntre AI - Deep Resea
1187
  """Switch to search mode"""
1188
  return "search"
1189
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1190
  # Mode switching - Research button
1191
  research_mode_btn.click(
1192
  fn=switch_to_research_mode,
@@ -1221,6 +1366,33 @@ with gr.Blocks(theme=luntre_theme, css=custom_css, title="Luntre AI - Deep Resea
1221
  """
1222
  )
1223
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1224
  # Edit and rerun
1225
  edit_event = edit_btn.click(
1226
  fn=load_query_for_edit,
@@ -1232,24 +1404,26 @@ with gr.Blocks(theme=luntre_theme, css=custom_css, title="Luntre AI - Deep Resea
1232
  # Rewrite (run again with same query)
1233
  rewrite_event = rewrite_btn.click(
1234
  fn=run_research,
1235
- inputs=[current_query_state, model_selector, conversation_history_state],
1236
  outputs=[report_output, references_output]
1237
  )
1238
 
1239
  # Clear conversation
1240
  def reset_conversation():
1241
- """Reset conversation and display welcome message"""
1242
  return (
1243
- [],
1244
- "<div class='welcome-message'>Welcome! Enter your research query below to get started.</div>",
1245
- "<div class='no-references'>No references yet. Run a research query to see sources.</div>",
1246
- "research" # Reset to research mode
 
 
1247
  )
1248
 
1249
  clear_conv_event = clear_conv_btn.click(
1250
  fn=reset_conversation,
1251
  inputs=[],
1252
- outputs=[conversation_history_state, report_output, references_output, current_mode_state],
1253
  queue=False,
1254
  js="""
1255
  () => {
@@ -1267,12 +1441,12 @@ with gr.Blocks(theme=luntre_theme, css=custom_css, title="Luntre AI - Deep Resea
1267
  # Store the event handler reference
1268
  submit_event_state = gr.State(None)
1269
 
1270
- def run_based_on_mode(query, model, history, mode):
1271
  """Wrapper to route to correct function based on mode"""
1272
  if mode == "research":
1273
- return run_research(query, model, history)
1274
  else:
1275
- return run_simple_search(query, model, history)
1276
 
1277
  query_input.submit(
1278
  fn=update_query_state,
@@ -1291,7 +1465,7 @@ with gr.Blocks(theme=luntre_theme, css=custom_css, title="Luntre AI - Deep Resea
1291
  queue=False
1292
  ).then(
1293
  fn=run_based_on_mode,
1294
- inputs=[current_query_state, model_selector, conversation_history_state, current_mode_state],
1295
  outputs=[report_output, references_output]
1296
  ).then(
1297
  fn=lambda mode: "search" if mode == "research" else mode,
 
3
  from research_manager import ResearchManager
4
  import markdown
5
  import re
6
+ from file_processor import process_file, get_file_icon, format_file_size
7
 
8
  load_dotenv(override=True)
9
 
10
 
11
+ async def run_research(query: str, model_choice: str, conversation_history: list, attachments: list, progress=gr.Progress()):
12
  """Run research and yield updates for both report and references"""
13
  status_messages = []
14
  final_report_md = ""
 
40
  '''
41
 
42
  # Collect all chunks and parse structured messages
43
+ async for chunk in ResearchManager(model_choice).run(query, conversation_history, attachments):
44
  # Parse structured messages (format: TYPE|data)
45
  if "|" in chunk:
46
  msg_type, msg_data = chunk.split("|", 1)
 
255
  return html
256
 
257
 
258
+ async def run_simple_search(query: str, model_choice: str, conversation_history: list, attachments: list, progress=gr.Progress()):
259
  """Run a quick follow-up search without full research workflow"""
260
  progress(0, desc="🔍 Quick search starting...")
261
 
 
278
 
279
  answer_text = ""
280
 
281
+ async for chunk in ResearchManager(model_choice).run_simple_search(query, conversation_history, attachments):
282
  # Parse structured messages
283
  if "|" in chunk:
284
  msg_type, msg_data = chunk.split("|", 1)
 
780
  box-shadow: 0 0 0 2px rgba(16, 185, 129, 0.15) !important;
781
  }
782
 
783
+ /* Attach button */
784
+ .attach-button {
785
+ background: rgba(55, 55, 55, 0.4) !important;
786
+ border: 1px solid rgba(55, 65, 81, 0.4) !important;
787
+ color: #9CA3AF !important;
788
+ border-radius: 8px !important;
789
+ font-weight: 500 !important;
790
+ padding: 0.4rem 1.25rem !important;
791
+ font-size: 0.875rem !important;
792
+ transition: all 0.2s cubic-bezier(0.4, 0, 0.2, 1) !important;
793
+ box-shadow: none !important;
794
+ text-transform: none !important;
795
+ flex: 0 0 auto;
796
+ min-height: 0 !important;
797
+ }
798
+
799
+ .attach-button:hover {
800
+ background: rgba(59, 130, 246, 0.1) !important;
801
+ border-color: rgba(59, 130, 246, 0.4) !important;
802
+ color: #3B82F6 !important;
803
+ transform: translateY(-1px) !important;
804
+ }
805
+
806
  /* Clear button */
807
  .clear-button {
808
  background: rgba(55, 55, 55, 0.4) !important;
 
826
  transform: translateY(-1px) !important;
827
  }
828
 
829
+ /* Attachments display area */
830
+ .attachments-display-area {
831
+ margin-bottom: 0.75rem;
832
+ min-height: 0;
833
+ }
834
+
835
+ .attachments-container {
836
+ background: rgba(45, 45, 45, 0.3);
837
+ border: 1px solid rgba(55, 65, 81, 0.3);
838
+ border-radius: 8px;
839
+ padding: 0.75rem;
840
+ color: #E5E7EB;
841
+ font-size: 0.875rem;
842
+ }
843
+
844
+ .attachments-header {
845
+ color: #10B981;
846
+ font-weight: 600;
847
+ margin-bottom: 0.5rem;
848
+ }
849
+
850
+ /* Attachment badges */
851
+ .attachment-badge {
852
+ display: inline-block;
853
+ background: rgba(16, 185, 129, 0.1);
854
+ border: 1px solid rgba(16, 185, 129, 0.3);
855
+ border-radius: 6px;
856
+ padding: 0.375rem 0.75rem;
857
+ margin: 0.25rem 0.25rem 0.25rem 0;
858
+ color: #10B981;
859
+ font-size: 0.8rem;
860
+ transition: all 0.2s;
861
+ }
862
+
863
+ .attachment-badge:hover {
864
+ background: rgba(16, 185, 129, 0.15);
865
+ border-color: rgba(16, 185, 129, 0.4);
866
+ }
867
+
868
+ .remove-attachment {
869
+ background: none;
870
+ border: none;
871
+ color: #EF4444;
872
+ cursor: pointer;
873
+ margin-left: 0.5rem;
874
+ font-weight: bold;
875
+ font-size: 0.9rem;
876
+ padding: 0;
877
+ transition: color 0.2s;
878
+ }
879
+
880
+ .remove-attachment:hover {
881
+ color: #DC2626;
882
+ }
883
+
884
  /* Modern Minimal Tabs */
885
  .tab-nav {
886
  background: transparent;
 
1153
  conversation_history_state = gr.State([])
1154
  # State to track current mode: "research" or "search"
1155
  current_mode_state = gr.State("research")
1156
+ # State to store attachments
1157
+ attachments_state = gr.State([])
1158
 
1159
  # Header
1160
  gr.HTML("""
 
1180
 
1181
  # Input Section at the BOTTOM (chat-style)
1182
  with gr.Group(elem_classes="input-container-bottom"):
1183
+ # Attachments display area
1184
+ attachments_display = gr.HTML(value="", elem_classes="attachments-display-area")
1185
+
1186
+ # Hidden file upload component
1187
+ file_upload = gr.File(
1188
+ label="Upload Files",
1189
+ file_types=[".txt", ".md", ".pdf", ".docx", ".doc", ".xlsx", ".xls", ".csv",
1190
+ ".json", ".py", ".js", ".ts", ".java", ".cpp", ".html", ".log"],
1191
+ file_count="multiple",
1192
+ visible=False,
1193
+ elem_id="file-upload-input"
1194
+ )
1195
+
1196
  query_input = gr.Textbox(
1197
  label="",
1198
  placeholder="What would you like to research? (e.g., 'What are the latest developments in quantum computing?')",
 
1206
  with gr.Row(elem_classes="controls-row"):
1207
  research_mode_btn = gr.Button("Research", variant="primary", elem_classes="mode-button research-mode active-mode", elem_id="research-mode-btn")
1208
  search_mode_btn = gr.Button("Search", variant="secondary", elem_classes="mode-button search-mode", elem_id="search-mode-btn")
1209
+ attach_btn = gr.Button("📎 Attach", variant="secondary", elem_classes="attach-button", elem_id="attach-btn")
1210
  clear_conv_btn = gr.Button("🗑️ Clear", variant="secondary", elem_classes="clear-button")
1211
  model_selector = gr.Dropdown(
1212
  choices=[
 
1282
  """Switch to search mode"""
1283
  return "search"
1284
 
1285
+ def handle_file_upload(files, current_attachments):
1286
+ """Process uploaded files and update attachments state"""
1287
+ if not files:
1288
+ return current_attachments, format_attachments_display(current_attachments)
1289
+
1290
+ # Handle single file or list of files
1291
+ if not isinstance(files, list):
1292
+ files = [files]
1293
+
1294
+ for file in files:
1295
+ if file is not None:
1296
+ # Process the file
1297
+ file_data = process_file(file.name)
1298
+ if file_data:
1299
+ current_attachments.append(file_data)
1300
+
1301
+ display_html = format_attachments_display(current_attachments)
1302
+ return current_attachments, display_html
1303
+
1304
+ def format_attachments_display(attachments):
1305
+ """Generate HTML for attachment badges"""
1306
+ if not attachments:
1307
+ return ""
1308
+
1309
+ html = '<div class="attachments-container">'
1310
+ html += f'<div class="attachments-header">📎 Attached Files ({len(attachments)})</div>'
1311
+
1312
+ for idx, att in enumerate(attachments):
1313
+ size_str = format_file_size(att['size_bytes'])
1314
+ icon = get_file_icon(att['file_type'])
1315
+ html += f'''
1316
+ <span class="attachment-badge" id="attachment-{idx}">
1317
+ {icon} {att['filename']} ({size_str})
1318
+ <button class="remove-attachment" onclick="document.getElementById('remove-att-{idx}').click();">✕</button>
1319
+ </span>
1320
+ '''
1321
+
1322
+ html += '</div>'
1323
+ return html
1324
+
1325
+ def remove_attachment(attachments, idx):
1326
+ """Remove attachment at given index"""
1327
+ if 0 <= idx < len(attachments):
1328
+ attachments.pop(idx)
1329
+ return attachments, format_attachments_display(attachments)
1330
+
1331
+ def clear_attachments():
1332
+ """Clear all attachments"""
1333
+ return [], ""
1334
+
1335
  # Mode switching - Research button
1336
  research_mode_btn.click(
1337
  fn=switch_to_research_mode,
 
1366
  """
1367
  )
1368
 
1369
+ # Attach button - trigger file upload dialog
1370
+ attach_btn.click(
1371
+ fn=None,
1372
+ inputs=[],
1373
+ outputs=[],
1374
+ js="""
1375
+ () => {
1376
+ // Trigger the hidden file input
1377
+ const fileInput = document.getElementById('file-upload-input');
1378
+ if (fileInput) {
1379
+ const actualInput = fileInput.querySelector('input[type="file"]');
1380
+ if (actualInput) {
1381
+ actualInput.click();
1382
+ }
1383
+ }
1384
+ }
1385
+ """
1386
+ )
1387
+
1388
+ # File upload handler
1389
+ file_upload.change(
1390
+ fn=handle_file_upload,
1391
+ inputs=[file_upload, attachments_state],
1392
+ outputs=[attachments_state, attachments_display],
1393
+ queue=False
1394
+ )
1395
+
1396
  # Edit and rerun
1397
  edit_event = edit_btn.click(
1398
  fn=load_query_for_edit,
 
1404
  # Rewrite (run again with same query)
1405
  rewrite_event = rewrite_btn.click(
1406
  fn=run_research,
1407
+ inputs=[current_query_state, model_selector, conversation_history_state, attachments_state],
1408
  outputs=[report_output, references_output]
1409
  )
1410
 
1411
  # Clear conversation
1412
  def reset_conversation():
1413
+ """Reset conversation, display welcome message, and clear attachments"""
1414
  return (
1415
+ [], # conversation_history
1416
+ "<div class='welcome-message'>Welcome! Enter your research query below to get started.</div>", # report_output
1417
+ "<div class='no-references'>No references yet. Run a research query to see sources.</div>", # references_output
1418
+ "research", # current_mode
1419
+ [], # attachments
1420
+ "" # attachments_display
1421
  )
1422
 
1423
  clear_conv_event = clear_conv_btn.click(
1424
  fn=reset_conversation,
1425
  inputs=[],
1426
+ outputs=[conversation_history_state, report_output, references_output, current_mode_state, attachments_state, attachments_display],
1427
  queue=False,
1428
  js="""
1429
  () => {
 
1441
  # Store the event handler reference
1442
  submit_event_state = gr.State(None)
1443
 
1444
+ def run_based_on_mode(query, model, history, attachments, mode):
1445
  """Wrapper to route to correct function based on mode"""
1446
  if mode == "research":
1447
+ return run_research(query, model, history, attachments)
1448
  else:
1449
+ return run_simple_search(query, model, history, attachments)
1450
 
1451
  query_input.submit(
1452
  fn=update_query_state,
 
1465
  queue=False
1466
  ).then(
1467
  fn=run_based_on_mode,
1468
+ inputs=[current_query_state, model_selector, conversation_history_state, attachments_state, current_mode_state],
1469
  outputs=[report_output, references_output]
1470
  ).then(
1471
  fn=lambda mode: "search" if mode == "research" else mode,
file_processor.py ADDED
@@ -0,0 +1,254 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ File processor for attachment feature
3
+ Supports: txt, md, py, js, json, csv, pdf, docx, xlsx
4
+ """
5
+
6
+ import os
7
+ from typing import Dict, Optional
8
+ import datetime
9
+
10
+
11
+ def process_file(file_path: str) -> Optional[Dict]:
12
+ """
13
+ Extract text content from uploaded file
14
+
15
+ Args:
16
+ file_path: Path to the uploaded file
17
+
18
+ Returns:
19
+ Dictionary with file metadata and content, or None if processing failed
20
+ """
21
+ try:
22
+ filename = os.path.basename(file_path)
23
+ file_size = os.path.getsize(file_path)
24
+ file_ext = os.path.splitext(filename)[1].lower()
25
+
26
+ # Process based on file type
27
+ content = extract_content(file_path, file_ext)
28
+
29
+ if content is None:
30
+ return None
31
+
32
+ # Truncate if too large (keep first 20,000 chars for context)
33
+ original_length = len(content)
34
+ max_chars = 20000
35
+
36
+ if original_length > max_chars:
37
+ content = content[:max_chars]
38
+ content += f"\n\n[📏 Content truncated - original file was {original_length:,} characters, showing first {max_chars:,}]"
39
+
40
+ return {
41
+ "filename": filename,
42
+ "content": content,
43
+ "size_bytes": file_size,
44
+ "file_type": file_ext[1:], # Remove the dot
45
+ "uploaded_at": datetime.datetime.now().isoformat(),
46
+ "char_count": len(content)
47
+ }
48
+
49
+ except Exception as e:
50
+ print(f"Error processing file {file_path}: {str(e)}")
51
+ return None
52
+
53
+
54
+ def extract_content(file_path: str, file_ext: str) -> Optional[str]:
55
+ """Extract text content based on file extension"""
56
+
57
+ # Plain text files
58
+ if file_ext in ['.txt', '.md', '.log', '.json', '.html', '.xml', '.css', '.sql']:
59
+ return read_text_file(file_path)
60
+
61
+ # Code files
62
+ elif file_ext in ['.py', '.js', '.ts', '.jsx', '.tsx', '.java', '.cpp', '.c', '.h',
63
+ '.cs', '.php', '.rb', '.go', '.rs', '.swift', '.kt', '.sh', '.yml', '.yaml']:
64
+ return read_text_file(file_path)
65
+
66
+ # CSV files
67
+ elif file_ext == '.csv':
68
+ return read_csv_file(file_path)
69
+
70
+ # PDF files
71
+ elif file_ext == '.pdf':
72
+ return read_pdf_file(file_path)
73
+
74
+ # Word documents
75
+ elif file_ext in ['.docx', '.doc']:
76
+ return read_docx_file(file_path)
77
+
78
+ # Excel files
79
+ elif file_ext in ['.xlsx', '.xls']:
80
+ return read_excel_file(file_path)
81
+
82
+ else:
83
+ return f"[❌ Unsupported file type: {file_ext}]"
84
+
85
+
86
+ def read_text_file(file_path: str) -> Optional[str]:
87
+ """Read plain text file with multiple encoding attempts"""
88
+ encodings = ['utf-8', 'utf-16', 'latin-1', 'cp1252']
89
+
90
+ for encoding in encodings:
91
+ try:
92
+ with open(file_path, 'r', encoding=encoding) as f:
93
+ return f.read()
94
+ except UnicodeDecodeError:
95
+ continue
96
+ except Exception as e:
97
+ print(f"Error reading text file with {encoding}: {e}")
98
+ continue
99
+
100
+ return "[❌ Could not decode text file - unsupported encoding]"
101
+
102
+
103
+ def read_csv_file(file_path: str) -> Optional[str]:
104
+ """Read CSV file and convert to formatted text"""
105
+ try:
106
+ import csv
107
+
108
+ with open(file_path, 'r', encoding='utf-8', newline='') as f:
109
+ reader = csv.reader(f)
110
+ rows = list(reader)
111
+
112
+ if not rows:
113
+ return "[Empty CSV file]"
114
+
115
+ # Format as text with column alignment
116
+ output = []
117
+ output.append(f"CSV Data ({len(rows)} rows):\n")
118
+ output.append("=" * 50)
119
+
120
+ # Header
121
+ if rows:
122
+ output.append(" | ".join(rows[0]))
123
+ output.append("-" * 50)
124
+
125
+ # Data rows (limit to first 100 rows for context)
126
+ for row in rows[1:101]:
127
+ output.append(" | ".join(str(cell) for cell in row))
128
+
129
+ if len(rows) > 101:
130
+ output.append(f"\n[... {len(rows) - 101} more rows]")
131
+
132
+ return "\n".join(output)
133
+
134
+ except Exception as e:
135
+ return f"[❌ Error reading CSV: {str(e)}]"
136
+
137
+
138
+ def read_pdf_file(file_path: str) -> Optional[str]:
139
+ """Extract text from PDF file"""
140
+ try:
141
+ import pdfplumber
142
+
143
+ with pdfplumber.open(file_path) as pdf:
144
+ pages_text = []
145
+
146
+ for i, page in enumerate(pdf.pages[:50]): # Limit to first 50 pages
147
+ text = page.extract_text()
148
+ if text:
149
+ pages_text.append(f"--- Page {i + 1} ---\n{text}")
150
+
151
+ if len(pdf.pages) > 50:
152
+ pages_text.append(f"\n[... {len(pdf.pages) - 50} more pages not shown]")
153
+
154
+ content = "\n\n".join(pages_text)
155
+
156
+ if not content.strip():
157
+ return "[❌ PDF appears to be empty or contains only images]"
158
+
159
+ return content
160
+
161
+ except ImportError:
162
+ return "[❌ pdfplumber not installed - run: pip install pdfplumber]"
163
+ except Exception as e:
164
+ return f"[❌ Error reading PDF: {str(e)}]"
165
+
166
+
167
+ def read_docx_file(file_path: str) -> Optional[str]:
168
+ """Extract text from Word document"""
169
+ try:
170
+ from docx import Document
171
+
172
+ doc = Document(file_path)
173
+ paragraphs = []
174
+
175
+ for para in doc.paragraphs:
176
+ if para.text.strip():
177
+ paragraphs.append(para.text)
178
+
179
+ # Also extract text from tables
180
+ for table in doc.tables:
181
+ for row in table.rows:
182
+ row_text = " | ".join(cell.text.strip() for cell in row.cells)
183
+ if row_text.strip():
184
+ paragraphs.append(row_text)
185
+
186
+ content = "\n\n".join(paragraphs)
187
+
188
+ if not content.strip():
189
+ return "[❌ Word document appears to be empty]"
190
+
191
+ return content
192
+
193
+ except ImportError:
194
+ return "[❌ python-docx not installed - run: pip install python-docx]"
195
+ except Exception as e:
196
+ return f"[❌ Error reading Word document: {str(e)}]"
197
+
198
+
199
+ def read_excel_file(file_path: str) -> Optional[str]:
200
+ """Extract text from Excel file"""
201
+ try:
202
+ import pandas as pd
203
+
204
+ # Read all sheets
205
+ excel_file = pd.ExcelFile(file_path)
206
+ output = []
207
+
208
+ output.append(f"Excel File - {len(excel_file.sheet_names)} sheet(s)\n")
209
+ output.append("=" * 50)
210
+
211
+ for sheet_name in excel_file.sheet_names:
212
+ df = pd.read_excel(file_path, sheet_name=sheet_name)
213
+
214
+ output.append(f"\n📊 Sheet: {sheet_name}")
215
+ output.append(f"Dimensions: {df.shape[0]} rows × {df.shape[1]} columns")
216
+ output.append("-" * 50)
217
+
218
+ # Convert to string representation (limit rows)
219
+ if len(df) > 50:
220
+ output.append(df.head(50).to_string(index=False))
221
+ output.append(f"\n[... {len(df) - 50} more rows]")
222
+ else:
223
+ output.append(df.to_string(index=False))
224
+
225
+ output.append("\n")
226
+
227
+ return "\n".join(output)
228
+
229
+ except ImportError:
230
+ return "[❌ pandas/openpyxl not installed - run: pip install pandas openpyxl]"
231
+ except Exception as e:
232
+ return f"[❌ Error reading Excel file: {str(e)}]"
233
+
234
+
235
+ def get_file_icon(file_type: str) -> str:
236
+ """Return emoji icon for file type"""
237
+ icons = {
238
+ 'txt': '📄', 'md': '📝', 'pdf': '📕', 'doc': '📘', 'docx': '📘',
239
+ 'xls': '📊', 'xlsx': '📊', 'csv': '📊',
240
+ 'json': '📋', 'xml': '📋', 'html': '🌐',
241
+ 'py': '🐍', 'js': '📜', 'ts': '📜', 'java': '☕', 'cpp': '⚙️',
242
+ 'log': '📋', 'sql': '🗄️', 'yml': '⚙️', 'yaml': '⚙️'
243
+ }
244
+ return icons.get(file_type, '📎')
245
+
246
+
247
+ def format_file_size(size_bytes: int) -> str:
248
+ """Format file size in human-readable format"""
249
+ if size_bytes < 1024:
250
+ return f"{size_bytes} B"
251
+ elif size_bytes < 1024 * 1024:
252
+ return f"{size_bytes / 1024:.1f} KB"
253
+ else:
254
+ return f"{size_bytes / (1024 * 1024):.1f} MB"
requirements.txt CHANGED
@@ -6,3 +6,9 @@ sendgrid
6
  requests
7
  openai
8
  markdown
 
 
 
 
 
 
 
6
  requests
7
  openai
8
  markdown
9
+
10
+ # Attachment feature dependencies
11
+ pdfplumber>=0.10.0
12
+ python-docx>=1.0.0
13
+ openpyxl>=3.1.0
14
+ pandas>=2.0.0
research_manager.py CHANGED
@@ -42,39 +42,69 @@ class ResearchManager:
42
  # Record this request
43
  self.request_times.append(current_time)
44
 
45
- def format_conversation_history(self, conversation_history: list) -> str:
46
- """Format conversation history for context injection into agent prompts"""
47
- if not conversation_history:
48
  return ""
49
 
50
- formatted = "\n\n=== PREVIOUS CONVERSATION HISTORY ===\n\n"
51
- for idx, turn in enumerate(conversation_history, 1):
52
- if turn.get("type") == "query":
53
- formatted += f"--- Previous Query {idx} ---\n{turn['content']}\n\n"
54
- elif turn.get("type") in ["report", "simple_search"]:
55
- # Truncate long reports to first 1000 chars to save context
56
- content = turn['content']
57
- if len(content) > 2000:
58
- content = content[:2000] + "\n... [Report truncated for context] ..."
59
- formatted += f"--- Previous {'Report' if turn['type'] == 'report' else 'Answer'} {idx} ---\n{content}\n\n"
60
-
61
- formatted += "=== END OF PREVIOUS CONVERSATION ===\n\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  return formatted
63
 
64
- async def run(self, query: str, conversation_history: list = None):
65
  """ Run the deep research process, yielding the status updates and the final report"""
66
  if conversation_history is None:
67
  conversation_history = []
 
 
68
 
69
  trace_id = gen_trace_id()
70
  model_display = get_model_display_name(self.model_choice)
71
 
72
  with trace("Research trace", trace_id=trace_id):
73
  print(f"Using Brave Search API and {model_display}")
74
- yield f"INIT|Using Brave Search API and {model_display}"
 
 
 
 
75
  print("Starting research...")
76
 
77
- search_plan = await self.plan_searches(query, conversation_history)
78
  num_searches = len(search_plan.searches)
79
  yield f"PLANNING_COMPLETE|{num_searches}"
80
 
@@ -90,7 +120,7 @@ class ResearchManager:
90
  yield "SEARCH_COMPLETE|All searches finished"
91
 
92
  yield "WRITING_START|Starting to write report..."
93
- report = await self.write_report(query, results, conversation_history)
94
  print(f"DEBUG: Report object created, markdown_report length: {len(report.markdown_report)}")
95
 
96
  # Yield the report BEFORE sending email
@@ -101,10 +131,12 @@ class ResearchManager:
101
  yield "COMPLETE|Research complete"
102
 
103
 
104
- async def plan_searches(self, query: str, conversation_history: list = None) -> WebSearchPlan:
105
  """ Plan the searches to perform for the query """
106
  if conversation_history is None:
107
  conversation_history = []
 
 
108
 
109
  print("Planning searches...")
110
  await self.wait_for_rate_limit()
@@ -118,8 +150,8 @@ class ResearchManager:
118
  output_type=WebSearchPlan,
119
  )
120
 
121
- # Format the input with conversation history if available
122
- context = self.format_conversation_history(conversation_history)
123
  input_text = f"{context}Current Query: {query}"
124
 
125
  result = await Runner.run(
@@ -179,15 +211,17 @@ class ResearchManager:
179
  return None
180
  return None
181
 
182
- async def write_report(self, query: str, search_results: list[str], conversation_history: list = None) -> ReportData:
183
  """ Write the report for the query with retry logic """
184
  if conversation_history is None:
185
  conversation_history = []
 
 
186
 
187
  print("Thinking about report...")
188
 
189
- # Format the input with conversation history if available
190
- context = self.format_conversation_history(conversation_history)
191
  input = f"{context}Current Query: {query}\n\nNew Search Results: {search_results}"
192
 
193
  # Create writer with selected model
@@ -263,19 +297,25 @@ class ResearchManager:
263
  print(f"Email sending failed: {str(e)}")
264
  return report
265
 
266
- async def run_simple_search(self, query: str, conversation_history: list = None):
267
  """Run a quick follow-up search without full research workflow"""
268
  if conversation_history is None:
269
  conversation_history = []
 
 
270
 
271
  print("Running simple search...")
272
- yield "SIMPLE_SEARCH_START|Starting quick search..."
 
 
 
 
273
 
274
  # Import simple search agent
275
  from simple_search_agent import simple_search_agent
276
 
277
- # Format conversation history for context
278
- context = self.format_conversation_history(conversation_history)
279
  input_text = f"{context}Current Question: {query}"
280
 
281
  try:
 
42
  # Record this request
43
  self.request_times.append(current_time)
44
 
45
+ def format_attachments_context(self, attachments: list) -> str:
46
+ """Format attachments for context injection into agent prompts"""
47
+ if not attachments:
48
  return ""
49
 
50
+ formatted = "\n\n=== ATTACHED DOCUMENTS ===\n\n"
51
+ formatted += f"The user has attached {len(attachments)} document(s) for context:\n\n"
52
+
53
+ for idx, att in enumerate(attachments, 1):
54
+ formatted += f"📎 Document {idx}: {att['filename']} ({att['file_type'].upper()} file, {att.get('char_count', 0):,} characters)\n"
55
+ formatted += f"Content:\n{att['content']}\n\n"
56
+ formatted += "---\n\n"
57
+
58
+ formatted += "=== END ATTACHED DOCUMENTS ===\n\n"
59
+ formatted += "IMPORTANT: Use the attached documents as primary reference material when answering the user's query.\n\n"
60
+
61
+ return formatted
62
+
63
+ def format_conversation_history(self, conversation_history: list, attachments: list = None) -> str:
64
+ """Format conversation history + attachments for context injection into agent prompts"""
65
+ formatted = ""
66
+
67
+ # Add attachments first (at the top of context)
68
+ if attachments:
69
+ formatted += self.format_attachments_context(attachments)
70
+
71
+ # Then add conversation history
72
+ if conversation_history:
73
+ formatted += "\n=== PREVIOUS CONVERSATION HISTORY ===\n\n"
74
+ for idx, turn in enumerate(conversation_history, 1):
75
+ if turn.get("type") == "query":
76
+ formatted += f"--- Previous Query {idx} ---\n{turn['content']}\n\n"
77
+ elif turn.get("type") in ["report", "simple_search"]:
78
+ # Truncate long reports to first 2000 chars to save context
79
+ content = turn['content']
80
+ if len(content) > 2000:
81
+ content = content[:2000] + "\n... [Report truncated for context] ..."
82
+ formatted += f"--- Previous {'Report' if turn['type'] == 'report' else 'Answer'} {idx} ---\n{content}\n\n"
83
+
84
+ formatted += "=== END OF PREVIOUS CONVERSATION ===\n\n"
85
+
86
  return formatted
87
 
88
+ async def run(self, query: str, conversation_history: list = None, attachments: list = None):
89
  """ Run the deep research process, yielding the status updates and the final report"""
90
  if conversation_history is None:
91
  conversation_history = []
92
+ if attachments is None:
93
+ attachments = []
94
 
95
  trace_id = gen_trace_id()
96
  model_display = get_model_display_name(self.model_choice)
97
 
98
  with trace("Research trace", trace_id=trace_id):
99
  print(f"Using Brave Search API and {model_display}")
100
+ if attachments:
101
+ print(f"With {len(attachments)} attached document(s)")
102
+ yield f"INIT|Using Brave Search API and {model_display} (with {len(attachments)} attachment(s))"
103
+ else:
104
+ yield f"INIT|Using Brave Search API and {model_display}"
105
  print("Starting research...")
106
 
107
+ search_plan = await self.plan_searches(query, conversation_history, attachments)
108
  num_searches = len(search_plan.searches)
109
  yield f"PLANNING_COMPLETE|{num_searches}"
110
 
 
120
  yield "SEARCH_COMPLETE|All searches finished"
121
 
122
  yield "WRITING_START|Starting to write report..."
123
+ report = await self.write_report(query, results, conversation_history, attachments)
124
  print(f"DEBUG: Report object created, markdown_report length: {len(report.markdown_report)}")
125
 
126
  # Yield the report BEFORE sending email
 
131
  yield "COMPLETE|Research complete"
132
 
133
 
134
+ async def plan_searches(self, query: str, conversation_history: list = None, attachments: list = None) -> WebSearchPlan:
135
  """ Plan the searches to perform for the query """
136
  if conversation_history is None:
137
  conversation_history = []
138
+ if attachments is None:
139
+ attachments = []
140
 
141
  print("Planning searches...")
142
  await self.wait_for_rate_limit()
 
150
  output_type=WebSearchPlan,
151
  )
152
 
153
+ # Format the input with conversation history and attachments if available
154
+ context = self.format_conversation_history(conversation_history, attachments)
155
  input_text = f"{context}Current Query: {query}"
156
 
157
  result = await Runner.run(
 
211
  return None
212
  return None
213
 
214
+ async def write_report(self, query: str, search_results: list[str], conversation_history: list = None, attachments: list = None) -> ReportData:
215
  """ Write the report for the query with retry logic """
216
  if conversation_history is None:
217
  conversation_history = []
218
+ if attachments is None:
219
+ attachments = []
220
 
221
  print("Thinking about report...")
222
 
223
+ # Format the input with conversation history and attachments if available
224
+ context = self.format_conversation_history(conversation_history, attachments)
225
  input = f"{context}Current Query: {query}\n\nNew Search Results: {search_results}"
226
 
227
  # Create writer with selected model
 
297
  print(f"Email sending failed: {str(e)}")
298
  return report
299
 
300
+ async def run_simple_search(self, query: str, conversation_history: list = None, attachments: list = None):
301
  """Run a quick follow-up search without full research workflow"""
302
  if conversation_history is None:
303
  conversation_history = []
304
+ if attachments is None:
305
+ attachments = []
306
 
307
  print("Running simple search...")
308
+ if attachments:
309
+ print(f"With {len(attachments)} attached document(s)")
310
+ yield f"SIMPLE_SEARCH_START|Starting quick search (with {len(attachments)} attachment(s))..."
311
+ else:
312
+ yield "SIMPLE_SEARCH_START|Starting quick search..."
313
 
314
  # Import simple search agent
315
  from simple_search_agent import simple_search_agent
316
 
317
+ # Format conversation history and attachments for context
318
+ context = self.format_conversation_history(conversation_history, attachments)
319
  input_text = f"{context}Current Question: {query}"
320
 
321
  try: