Spaces:
Sleeping
Sleeping
fix
Browse files
main.py
CHANGED
|
@@ -13,6 +13,7 @@ from datetime import datetime
|
|
| 13 |
import time
|
| 14 |
import random
|
| 15 |
from collections import defaultdict, deque
|
|
|
|
| 16 |
|
| 17 |
from content_analyzer.document_parser import DocumentProcessor
|
| 18 |
from search_engine.indexer import RetrieverBuilder
|
|
@@ -20,20 +21,28 @@ from intelligence.orchestrator import AgentWorkflow
|
|
| 20 |
from configuration import definitions, parameters
|
| 21 |
|
| 22 |
|
| 23 |
-
# Rate limiting configuration - 3 requests per
|
| 24 |
WINDOW_S = 3600
|
| 25 |
MAX_CALLS = 3
|
| 26 |
_calls = defaultdict(deque) # ip -> timestamps
|
|
|
|
| 27 |
|
| 28 |
def rate_limit(request):
|
|
|
|
| 29 |
ip = getattr(request.client, "host", "unknown")
|
| 30 |
now = time.time()
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
q
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
|
| 39 |
# Example data for demo
|
|
@@ -895,13 +904,18 @@ setInterval(tick, 500);
|
|
| 895 |
from huggingface_hub import hf_hub_download
|
| 896 |
copied_files = []
|
| 897 |
file_info_text = f"✅ Loaded: {example_key}\n\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 898 |
for file_path in file_names:
|
| 899 |
filename = os.path.basename(file_path)
|
| 900 |
try:
|
| 901 |
local_path = hf_hub_download(
|
| 902 |
-
repo_id="
|
| 903 |
repo_type="dataset",
|
| 904 |
filename=filename,
|
|
|
|
| 905 |
)
|
| 906 |
copied_files.append(local_path)
|
| 907 |
file_size_mb = os.path.getsize(local_path) / (1024 * 1024)
|
|
@@ -910,7 +924,7 @@ setInterval(tick, 500);
|
|
| 910 |
logger.error(f"Failed to download {filename}: {e}")
|
| 911 |
file_info_text += f"❌ {filename} - Download failed\n"
|
| 912 |
if not copied_files:
|
| 913 |
-
return [], "", "❌ Could not load example files from dataset"
|
| 914 |
return copied_files, question_text, file_info_text
|
| 915 |
except ImportError:
|
| 916 |
return [], "", "❌ huggingface_hub not installed"
|
|
@@ -940,6 +954,16 @@ setInterval(tick, 500);
|
|
| 940 |
inputs=[example_dropdown],
|
| 941 |
outputs=[files, question, loaded_file_info]
|
| 942 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 943 |
# Launch server - Compatible with both local and Hugging Face Spaces
|
| 944 |
# HF Spaces sets SPACE_ID environment variable
|
| 945 |
is_hf_space = os.environ.get("SPACE_ID") is not None
|
|
|
|
| 13 |
import time
|
| 14 |
import random
|
| 15 |
from collections import defaultdict, deque
|
| 16 |
+
import threading
|
| 17 |
|
| 18 |
from content_analyzer.document_parser import DocumentProcessor
|
| 19 |
from search_engine.indexer import RetrieverBuilder
|
|
|
|
| 21 |
from configuration import definitions, parameters
|
| 22 |
|
| 23 |
|
| 24 |
+
# Rate limiting configuration - 3 requests per hour per IP
|
| 25 |
WINDOW_S = 3600
|
| 26 |
MAX_CALLS = 3
|
| 27 |
_calls = defaultdict(deque) # ip -> timestamps
|
| 28 |
+
_calls_lock = threading.Lock() # Thread-safe access to rate limit state
|
| 29 |
|
| 30 |
def rate_limit(request):
|
| 31 |
+
"""Thread-safe rate limiting per IP address."""
|
| 32 |
ip = getattr(request.client, "host", "unknown")
|
| 33 |
now = time.time()
|
| 34 |
+
|
| 35 |
+
with _calls_lock:
|
| 36 |
+
q = _calls[ip]
|
| 37 |
+
# Remove expired entries
|
| 38 |
+
while q and (now - q[0]) > WINDOW_S:
|
| 39 |
+
q.popleft()
|
| 40 |
+
|
| 41 |
+
if len(q) >= MAX_CALLS:
|
| 42 |
+
import gradio as gr
|
| 43 |
+
raise gr.Error(f"Rate limit: {MAX_CALLS} requests per {WINDOW_S//60} minutes. Please wait.")
|
| 44 |
+
|
| 45 |
+
q.append(now)
|
| 46 |
|
| 47 |
|
| 48 |
# Example data for demo
|
|
|
|
| 904 |
from huggingface_hub import hf_hub_download
|
| 905 |
copied_files = []
|
| 906 |
file_info_text = f"✅ Loaded: {example_key}\n\n"
|
| 907 |
+
|
| 908 |
+
# Get HF token for private dataset access (optional)
|
| 909 |
+
hf_token = os.environ.get("HF_TOKEN", None)
|
| 910 |
+
|
| 911 |
for file_path in file_names:
|
| 912 |
filename = os.path.basename(file_path)
|
| 913 |
try:
|
| 914 |
local_path = hf_hub_download(
|
| 915 |
+
repo_id="TilanB/smartdoc-samples", # Correct dataset repo
|
| 916 |
repo_type="dataset",
|
| 917 |
filename=filename,
|
| 918 |
+
token=hf_token, # Pass token for private repos
|
| 919 |
)
|
| 920 |
copied_files.append(local_path)
|
| 921 |
file_size_mb = os.path.getsize(local_path) / (1024 * 1024)
|
|
|
|
| 924 |
logger.error(f"Failed to download {filename}: {e}")
|
| 925 |
file_info_text += f"❌ {filename} - Download failed\n"
|
| 926 |
if not copied_files:
|
| 927 |
+
return [], "", "❌ Could not load example files from dataset. Make sure the dataset is public or HF_TOKEN is set."
|
| 928 |
return copied_files, question_text, file_info_text
|
| 929 |
except ImportError:
|
| 930 |
return [], "", "❌ huggingface_hub not installed"
|
|
|
|
| 954 |
inputs=[example_dropdown],
|
| 955 |
outputs=[files, question, loaded_file_info]
|
| 956 |
)
|
| 957 |
+
|
| 958 |
+
# Show loaded_file_info when example is selected
|
| 959 |
+
def show_info(example_key):
|
| 960 |
+
return gr.update(visible=bool(example_key))
|
| 961 |
+
|
| 962 |
+
example_dropdown.change(
|
| 963 |
+
fn=show_info,
|
| 964 |
+
inputs=[example_dropdown],
|
| 965 |
+
outputs=[loaded_file_info]
|
| 966 |
+
)
|
| 967 |
# Launch server - Compatible with both local and Hugging Face Spaces
|
| 968 |
# HF Spaces sets SPACE_ID environment variable
|
| 969 |
is_hf_space = os.environ.get("SPACE_ID") is not None
|