Spaces:
Sleeping
Sleeping
Commit ·
6be6cf5
1
Parent(s): a6c1558
Using old Frontend
Browse files
app.py
CHANGED
|
@@ -666,499 +666,207 @@ class HighPerformanceSystem:
|
|
| 666 |
# Initialize the system
|
| 667 |
high_performance_system = HighPerformanceSystem()
|
| 668 |
|
| 669 |
-
def process_hackathon_submission(
|
| 670 |
-
"""
|
| 671 |
-
if not url or not questions_text:
|
| 672 |
-
return "Please provide both document URL and questions."
|
| 673 |
-
|
| 674 |
try:
|
| 675 |
-
#
|
| 676 |
-
if
|
| 677 |
-
|
| 678 |
-
|
| 679 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 680 |
questions = [q.strip() for q in questions_text.split('\n') if q.strip()]
|
| 681 |
|
| 682 |
if not questions:
|
| 683 |
-
return "No valid questions found
|
| 684 |
|
| 685 |
# Process document
|
| 686 |
-
doc_result =
|
| 687 |
-
if not doc_result.get(
|
| 688 |
-
return f"Document processing failed: {doc_result.get('error')}"
|
| 689 |
|
| 690 |
# Process questions
|
| 691 |
-
batch_result =
|
| 692 |
-
|
| 693 |
-
# Format
|
| 694 |
-
|
| 695 |
-
"answers":
|
| 696 |
-
"
|
| 697 |
-
"
|
| 698 |
-
"
|
| 699 |
-
"
|
| 700 |
-
"
|
| 701 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 702 |
}
|
| 703 |
|
| 704 |
-
return json.dumps(
|
| 705 |
|
| 706 |
-
except json.JSONDecodeError as e:
|
| 707 |
-
return f"JSON parsing error: {str(e)}. Please provide valid JSON array or one question per line."
|
| 708 |
except Exception as e:
|
| 709 |
-
|
|
|
|
| 710 |
|
| 711 |
-
def
|
| 712 |
-
"""Process single question with detailed
|
| 713 |
-
if not
|
| 714 |
-
return "
|
|
|
|
|
|
|
|
|
|
| 715 |
|
| 716 |
try:
|
| 717 |
-
# Process document
|
| 718 |
-
|
| 719 |
-
|
| 720 |
-
|
|
|
|
| 721 |
|
| 722 |
-
# Process
|
| 723 |
-
result =
|
| 724 |
|
| 725 |
# Format detailed response
|
| 726 |
-
|
| 727 |
-
|
| 728 |
-
|
| 729 |
-
|
| 730 |
-
|
| 731 |
-
|
| 732 |
-
|
| 733 |
-
|
| 734 |
-
|
| 735 |
-
|
| 736 |
-
|
| 737 |
-
"total_words": doc_result['total_words'],
|
| 738 |
-
"processing_time": f"{doc_result['processing_time']:.2f}s"
|
| 739 |
-
}
|
| 740 |
-
}
|
| 741 |
-
}
|
| 742 |
|
| 743 |
-
return
|
| 744 |
|
| 745 |
except Exception as e:
|
| 746 |
-
return f"Error
|
| 747 |
|
| 748 |
-
|
| 749 |
-
|
| 750 |
-
#
|
| 751 |
-
|
| 752 |
|
| 753 |
-
|
| 754 |
-
|
| 755 |
-
|
| 756 |
-
|
| 757 |
-
|
| 758 |
-
|
| 759 |
-
|
| 760 |
-
|
| 761 |
-
|
| 762 |
-
|
| 763 |
-
|
| 764 |
-
|
| 765 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 766 |
|
| 767 |
-
|
| 768 |
-
|
| 769 |
-
|
| 770 |
-
# --- Gradio Interface (CPU-Optimized) ---
|
| 771 |
-
with gr.Blocks(
|
| 772 |
-
theme=gr.themes.Soft(
|
| 773 |
-
primary_hue="indigo",
|
| 774 |
-
secondary_hue="blue",
|
| 775 |
-
neutral_hue="slate",
|
| 776 |
-
font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui", "sans-serif"],
|
| 777 |
-
),
|
| 778 |
-
css="""
|
| 779 |
-
/* --- Custom CSS for a Professional Look --- */
|
| 780 |
-
:root {
|
| 781 |
-
--primary-color: #4f46e5;
|
| 782 |
-
--secondary-color: #1e40af;
|
| 783 |
-
--accent-color: #06b6d4;
|
| 784 |
-
--background-color: #f8fafc;
|
| 785 |
-
--card-background: linear-gradient(145deg, #ffffff, #f1f5f9);
|
| 786 |
-
--text-color: #334155;
|
| 787 |
-
--text-secondary: #64748b;
|
| 788 |
-
--border-color: #e2e8f0;
|
| 789 |
-
--success-color: #10b981;
|
| 790 |
-
--warning-color: #f59e0b;
|
| 791 |
-
--shadow-sm: 0 1px 2px 0 rgba(0, 0, 0, 0.05);
|
| 792 |
-
--shadow-md: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -2px rgba(0, 0, 0, 0.1);
|
| 793 |
-
--shadow-lg: 0 10px 15px -3px rgba(0, 0, 0, 0.1), 0 4px 6px -2px rgba(0, 0, 0, 0.05);
|
| 794 |
-
--border-radius: 12px;
|
| 795 |
-
--border-radius-sm: 8px;
|
| 796 |
-
}
|
| 797 |
-
|
| 798 |
-
.gradio-container {
|
| 799 |
-
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
| 800 |
-
min-height: 100vh;
|
| 801 |
-
}
|
| 802 |
-
|
| 803 |
-
.main-content {
|
| 804 |
-
background: var(--card-background);
|
| 805 |
-
border-radius: var(--border-radius);
|
| 806 |
-
box-shadow: var(--shadow-lg);
|
| 807 |
-
margin: 1rem;
|
| 808 |
-
overflow: hidden;
|
| 809 |
-
}
|
| 810 |
-
|
| 811 |
-
.app-header {
|
| 812 |
-
text-align: center;
|
| 813 |
-
padding: 3rem 2rem;
|
| 814 |
-
background: linear-gradient(135deg, var(--primary-color) 0%, var(--secondary-color) 50%, var(--accent-color) 100%);
|
| 815 |
-
color: white;
|
| 816 |
-
position: relative;
|
| 817 |
-
overflow: hidden;
|
| 818 |
-
}
|
| 819 |
-
|
| 820 |
-
.app-header::before {
|
| 821 |
-
content: '';
|
| 822 |
-
position: absolute;
|
| 823 |
-
top: -50%;
|
| 824 |
-
left: -50%;
|
| 825 |
-
width: 200%;
|
| 826 |
-
height: 200%;
|
| 827 |
-
background: repeating-linear-gradient(
|
| 828 |
-
45deg,
|
| 829 |
-
transparent,
|
| 830 |
-
transparent 10px,
|
| 831 |
-
rgba(255,255,255,0.05) 10px,
|
| 832 |
-
rgba(255,255,255,0.05) 20px
|
| 833 |
-
);
|
| 834 |
-
animation: shimmer 20s linear infinite;
|
| 835 |
-
}
|
| 836 |
-
|
| 837 |
-
@keyframes shimmer {
|
| 838 |
-
0% { transform: translateX(-50%) translateY(-50%) rotate(0deg); }
|
| 839 |
-
100% { transform: translateX(-50%) translateY(-50%) rotate(360deg); }
|
| 840 |
-
}
|
| 841 |
-
|
| 842 |
-
.app-header h1 {
|
| 843 |
-
font-size: 2.75rem;
|
| 844 |
-
font-weight: 800;
|
| 845 |
-
margin-bottom: 0.75rem;
|
| 846 |
-
position: relative;
|
| 847 |
-
z-index: 2;
|
| 848 |
-
text-shadow: 2px 2px 4px rgba(0,0,0,0.3);
|
| 849 |
-
}
|
| 850 |
-
|
| 851 |
-
.app-header p {
|
| 852 |
-
font-size: 1.2rem;
|
| 853 |
-
opacity: 0.95;
|
| 854 |
-
position: relative;
|
| 855 |
-
z-index: 2;
|
| 856 |
-
font-weight: 500;
|
| 857 |
-
}
|
| 858 |
-
|
| 859 |
-
.feature-badge {
|
| 860 |
-
display: inline-block;
|
| 861 |
-
background: rgba(255,255,255,0.2);
|
| 862 |
-
padding: 0.5rem 1rem;
|
| 863 |
-
border-radius: 50px;
|
| 864 |
-
margin: 0.25rem;
|
| 865 |
-
font-size: 0.9rem;
|
| 866 |
-
font-weight: 600;
|
| 867 |
-
backdrop-filter: blur(10px);
|
| 868 |
-
}
|
| 869 |
-
|
| 870 |
-
.status-text {
|
| 871 |
-
padding: 1.5rem !important;
|
| 872 |
-
background: linear-gradient(135deg, #e0e7ff 0%, #c7d2fe 100%) !important;
|
| 873 |
-
color: var(--primary-color) !important;
|
| 874 |
-
border-radius: var(--border-radius) !important;
|
| 875 |
-
text-align: center;
|
| 876 |
-
border: 2px solid rgba(79, 70, 229, 0.2) !important;
|
| 877 |
-
font-weight: 600;
|
| 878 |
-
font-size: 1.1rem;
|
| 879 |
-
box-shadow: var(--shadow-md) !important;
|
| 880 |
-
}
|
| 881 |
-
|
| 882 |
-
.input-container {
|
| 883 |
-
background: var(--card-background);
|
| 884 |
-
border-radius: var(--border-radius);
|
| 885 |
-
padding: 2rem;
|
| 886 |
-
margin: 1rem;
|
| 887 |
-
box-shadow: var(--shadow-md);
|
| 888 |
-
border: 1px solid var(--border-color);
|
| 889 |
-
}
|
| 890 |
-
|
| 891 |
-
.output-container {
|
| 892 |
-
background: var(--card-background);
|
| 893 |
-
border-radius: var(--border-radius);
|
| 894 |
-
padding: 2rem;
|
| 895 |
-
margin: 1rem;
|
| 896 |
-
box-shadow: var(--shadow-md);
|
| 897 |
-
border: 1px solid var(--border-color);
|
| 898 |
-
min-height: 600px;
|
| 899 |
-
}
|
| 900 |
-
|
| 901 |
-
.section-title {
|
| 902 |
-
color: var(--primary-color);
|
| 903 |
-
font-size: 1.5rem;
|
| 904 |
-
font-weight: 700;
|
| 905 |
-
margin-bottom: 1.5rem;
|
| 906 |
-
display: flex;
|
| 907 |
-
align-items: center;
|
| 908 |
-
gap: 0.5rem;
|
| 909 |
-
}
|
| 910 |
-
|
| 911 |
-
.tab-content {
|
| 912 |
-
padding: 1.5rem;
|
| 913 |
-
background: white;
|
| 914 |
-
border-radius: var(--border-radius-sm);
|
| 915 |
-
box-shadow: var(--shadow-sm);
|
| 916 |
-
border: 1px solid var(--border-color);
|
| 917 |
-
}
|
| 918 |
-
|
| 919 |
-
.gr-button {
|
| 920 |
-
border-radius: var(--border-radius-sm) !important;
|
| 921 |
-
font-weight: 600 !important;
|
| 922 |
-
transition: all 0.3s ease !important;
|
| 923 |
-
box-shadow: var(--shadow-sm) !important;
|
| 924 |
-
}
|
| 925 |
-
|
| 926 |
-
.gr-button:hover {
|
| 927 |
-
transform: translateY(-2px) !important;
|
| 928 |
-
box-shadow: var(--shadow-md) !important;
|
| 929 |
-
}
|
| 930 |
-
|
| 931 |
-
.gr-textbox textarea, .gr-textbox input {
|
| 932 |
-
border-radius: var(--border-radius-sm) !important;
|
| 933 |
-
border: 2px solid var(--border-color) !important;
|
| 934 |
-
transition: border-color 0.3s ease !important;
|
| 935 |
-
}
|
| 936 |
-
|
| 937 |
-
.gr-textbox textarea:focus, .gr-textbox input:focus {
|
| 938 |
-
border-color: var(--primary-color) !important;
|
| 939 |
-
box-shadow: 0 0 0 3px rgba(79, 70, 229, 0.1) !important;
|
| 940 |
-
}
|
| 941 |
-
|
| 942 |
-
.example-box {
|
| 943 |
-
background: #f1f5f9;
|
| 944 |
-
border-radius: var(--border-radius-sm);
|
| 945 |
-
padding: 1rem;
|
| 946 |
-
margin: 1rem 0;
|
| 947 |
-
border-left: 4px solid var(--accent-color);
|
| 948 |
-
}
|
| 949 |
-
|
| 950 |
-
.stats-grid {
|
| 951 |
-
display: grid;
|
| 952 |
-
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
|
| 953 |
-
gap: 1rem;
|
| 954 |
-
margin: 1rem 0;
|
| 955 |
-
}
|
| 956 |
-
|
| 957 |
-
.stat-card {
|
| 958 |
-
background: white;
|
| 959 |
-
padding: 1.5rem;
|
| 960 |
-
border-radius: var(--border-radius-sm);
|
| 961 |
-
text-align: center;
|
| 962 |
-
box-shadow: var(--shadow-sm);
|
| 963 |
-
border: 1px solid var(--border-color);
|
| 964 |
-
}
|
| 965 |
-
|
| 966 |
-
.stat-number {
|
| 967 |
-
font-size: 2rem;
|
| 968 |
-
font-weight: 800;
|
| 969 |
-
color: var(--primary-color);
|
| 970 |
-
}
|
| 971 |
-
|
| 972 |
-
.stat-label {
|
| 973 |
-
color: var(--text-secondary);
|
| 974 |
-
font-size: 0.9rem;
|
| 975 |
-
margin-top: 0.5rem;
|
| 976 |
-
}
|
| 977 |
-
"""
|
| 978 |
-
) as demo:
|
| 979 |
-
|
| 980 |
-
# --- Main Container ---
|
| 981 |
-
with gr.Column(elem_classes="main-content"):
|
| 982 |
-
|
| 983 |
-
# --- Header ---
|
| 984 |
-
gr.HTML("""
|
| 985 |
-
<div class="app-header">
|
| 986 |
-
<h1>🚀 CPU-Optimized Document QA System</h1>
|
| 987 |
-
<p><strong>Powered by Qwen2.5-1.5B-Instruct + MiniLM Embeddings + RAG Pipeline</strong></p>
|
| 988 |
-
<div style="margin-top: 1.5rem;">
|
| 989 |
-
<span class="feature-badge">🔒 Insurance Documents</span>
|
| 990 |
-
<span class="feature-badge">⚖️ Legal Analysis</span>
|
| 991 |
-
<span class="feature-badge">👥 HR Compliance</span>
|
| 992 |
-
<span class="feature-badge">📊 Smart Extraction</span>
|
| 993 |
-
<span class="feature-badge">💻 CPU Optimized</span>
|
| 994 |
-
</div>
|
| 995 |
-
</div>
|
| 996 |
-
""")
|
| 997 |
-
|
| 998 |
-
# --- Stats Section ---
|
| 999 |
-
gr.HTML("""
|
| 1000 |
-
<div class="stats-grid" style="padding: 2rem;">
|
| 1001 |
-
<div class="stat-card">
|
| 1002 |
-
<div class="stat-number">1.5B</div>
|
| 1003 |
-
<div class="stat-label">Parameters</div>
|
| 1004 |
-
</div>
|
| 1005 |
-
<div class="stat-card">
|
| 1006 |
-
<div class="stat-number">CPU</div>
|
| 1007 |
-
<div class="stat-label">Optimized</div>
|
| 1008 |
-
</div>
|
| 1009 |
-
<div class="stat-card">
|
| 1010 |
-
<div class="stat-number">< 5s</div>
|
| 1011 |
-
<div class="stat-label">Response Time</div>
|
| 1012 |
-
</div>
|
| 1013 |
-
<div class="stat-card">
|
| 1014 |
-
<div class="stat-number">Multi</div>
|
| 1015 |
-
<div class="stat-label">Document Types</div>
|
| 1016 |
-
</div>
|
| 1017 |
-
</div>
|
| 1018 |
-
""")
|
| 1019 |
-
|
| 1020 |
-
# --- Main Content Area ---
|
| 1021 |
with gr.Row():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1022 |
|
| 1023 |
-
|
| 1024 |
-
|
| 1025 |
-
|
| 1026 |
-
|
| 1027 |
-
|
| 1028 |
-
|
| 1029 |
-
|
| 1030 |
-
|
| 1031 |
-
|
| 1032 |
-
|
| 1033 |
-
|
| 1034 |
-
|
| 1035 |
-
|
| 1036 |
-
|
| 1037 |
-
|
| 1038 |
-
|
| 1039 |
-
|
| 1040 |
-
|
| 1041 |
-
|
| 1042 |
-
|
| 1043 |
-
|
| 1044 |
-
|
| 1045 |
-
|
| 1046 |
-
|
| 1047 |
-
|
| 1048 |
-
|
| 1049 |
-
|
| 1050 |
-
|
| 1051 |
-
|
| 1052 |
-
|
| 1053 |
-
|
| 1054 |
-
|
| 1055 |
-
|
| 1056 |
-
|
| 1057 |
-
|
| 1058 |
-
|
| 1059 |
-
|
| 1060 |
-
|
| 1061 |
-
|
| 1062 |
-
|
| 1063 |
-
|
| 1064 |
-
|
| 1065 |
-
|
| 1066 |
-
|
| 1067 |
-
|
| 1068 |
-
|
| 1069 |
-
|
| 1070 |
-
hack_status = gr.Markdown(visible=False, elem_classes="status-text")
|
| 1071 |
-
|
| 1072 |
-
# --- Single Query Analysis Tab ---
|
| 1073 |
-
with gr.Tab("🔍 Single Query Analysis", id=1):
|
| 1074 |
-
with gr.Column(elem_classes="tab-content"):
|
| 1075 |
-
gr.HTML('<h3 class="section-title">🔍 Detailed Document Query</h3>')
|
| 1076 |
-
|
| 1077 |
-
single_url = gr.Textbox(
|
| 1078 |
-
label="📄 Document URL",
|
| 1079 |
-
placeholder="Enter the public URL of the document...",
|
| 1080 |
-
lines=2,
|
| 1081 |
-
info="URL to your PDF or DOCX document"
|
| 1082 |
-
)
|
| 1083 |
-
|
| 1084 |
-
single_question = gr.Textbox(
|
| 1085 |
-
label="❓ Your Question",
|
| 1086 |
-
placeholder="What is the waiting period for cataract surgery?",
|
| 1087 |
-
lines=5,
|
| 1088 |
-
info="Ask a specific question about your document"
|
| 1089 |
-
)
|
| 1090 |
-
|
| 1091 |
-
gr.HTML("""
|
| 1092 |
-
<div class="example-box">
|
| 1093 |
-
<strong>💡 Pro Tip:</strong><br>
|
| 1094 |
-
<small>Be specific in your questions for better results. Include context like "waiting period", "coverage amount", or "eligibility criteria".</small>
|
| 1095 |
-
</div>
|
| 1096 |
-
""")
|
| 1097 |
-
|
| 1098 |
-
with gr.Row():
|
| 1099 |
-
single_clear_btn = gr.Button("🗑️ Clear", variant="secondary", size="sm")
|
| 1100 |
-
single_submit_btn = gr.Button("🔍 Get Detailed Answer", variant="primary", size="lg")
|
| 1101 |
-
|
| 1102 |
-
single_status = gr.Markdown(visible=False, elem_classes="status-text")
|
| 1103 |
-
|
| 1104 |
-
# --- Right Column: Outputs ---
|
| 1105 |
-
with gr.Column(scale=2):
|
| 1106 |
-
with gr.Column(elem_classes="output-container"):
|
| 1107 |
-
gr.HTML('<h3 class="section-title">📊 Analysis Results</h3>')
|
| 1108 |
-
|
| 1109 |
-
with gr.Tabs():
|
| 1110 |
-
with gr.Tab("✅ Hackathon Results", id=2):
|
| 1111 |
-
hack_output = gr.Textbox(
|
| 1112 |
-
label="📊 Hackathon JSON Response",
|
| 1113 |
-
lines=25,
|
| 1114 |
-
max_lines=35,
|
| 1115 |
-
interactive=False,
|
| 1116 |
-
info="Complete JSON response with all answers and metadata",
|
| 1117 |
-
show_copy_button=True
|
| 1118 |
-
)
|
| 1119 |
-
|
| 1120 |
-
with gr.Tab("🔍 Single Query Results", id=3):
|
| 1121 |
-
single_output = gr.Textbox(
|
| 1122 |
-
label="📋 Detailed Single Query Response",
|
| 1123 |
-
lines=25,
|
| 1124 |
-
max_lines=35,
|
| 1125 |
-
interactive=False,
|
| 1126 |
-
info="Comprehensive answer with supporting context",
|
| 1127 |
-
show_copy_button=True
|
| 1128 |
-
)
|
| 1129 |
-
|
| 1130 |
-
# --- Footer ---
|
| 1131 |
-
gr.HTML("""
|
| 1132 |
-
<div style="text-align: center; padding: 2rem; color: #64748b; border-top: 1px solid #e2e8f0; margin-top: 2rem;">
|
| 1133 |
-
<p><strong>⚡ CPU-Optimized for Hugging Face Spaces</strong></p>
|
| 1134 |
-
<p>Built with advanced RAG architecture for maximum accuracy on CPU hardware</p>
|
| 1135 |
-
</div>
|
| 1136 |
""")
|
| 1137 |
-
|
| 1138 |
-
# --- Event Handlers ---
|
| 1139 |
|
| 1140 |
-
#
|
| 1141 |
-
|
| 1142 |
-
|
| 1143 |
inputs=[hack_url, hack_questions],
|
| 1144 |
-
outputs=[
|
| 1145 |
)
|
| 1146 |
|
| 1147 |
-
|
| 1148 |
-
|
| 1149 |
-
outputs=[hack_url, hack_questions, hack_output, hack_status]
|
| 1150 |
-
)
|
| 1151 |
-
|
| 1152 |
-
# Single Query Tab Logic
|
| 1153 |
-
single_submit_btn.click(
|
| 1154 |
-
fn=single_query_wrapper,
|
| 1155 |
inputs=[single_url, single_question],
|
| 1156 |
-
outputs=[
|
| 1157 |
-
)
|
| 1158 |
-
|
| 1159 |
-
single_clear_btn.click(
|
| 1160 |
-
lambda: (None, None, None, gr.Markdown(visible=False)),
|
| 1161 |
-
outputs=[single_url, single_question, single_output, single_status]
|
| 1162 |
)
|
| 1163 |
|
| 1164 |
# Queue for better performance on Spaces
|
|
|
|
| 666 |
# Initialize the system
|
| 667 |
high_performance_system = HighPerformanceSystem()
|
| 668 |
|
| 669 |
+
def process_hackathon_submission(document_url: str, questions_text: str) -> str:
|
| 670 |
+
"""Main function for hackathon submission"""
|
|
|
|
|
|
|
|
|
|
| 671 |
try:
|
| 672 |
+
# Validate inputs
|
| 673 |
+
if not document_url.strip():
|
| 674 |
+
return json.dumps({"error": "Document URL is required"}, indent=2)
|
| 675 |
+
|
| 676 |
+
if not questions_text.strip():
|
| 677 |
+
return json.dumps({"error": "Questions are required"}, indent=2)
|
| 678 |
+
|
| 679 |
+
# Parse questions
|
| 680 |
+
try:
|
| 681 |
+
if questions_text.strip().startswith('['):
|
| 682 |
+
questions = json.loads(questions_text)
|
| 683 |
+
else:
|
| 684 |
+
questions = [q.strip() for q in questions_text.split('\n') if q.strip()]
|
| 685 |
+
except json.JSONDecodeError:
|
| 686 |
questions = [q.strip() for q in questions_text.split('\n') if q.strip()]
|
| 687 |
|
| 688 |
if not questions:
|
| 689 |
+
return json.dumps({"error": "No valid questions found"}, indent=2)
|
| 690 |
|
| 691 |
# Process document
|
| 692 |
+
doc_result = hackathon_system.process_document_efficiently(document_url)
|
| 693 |
+
if not doc_result.get('success'):
|
| 694 |
+
return json.dumps({"error": f"Document processing failed: {doc_result.get('error')}"}, indent=2)
|
| 695 |
|
| 696 |
# Process questions
|
| 697 |
+
batch_result = hackathon_system.process_batch_queries(questions)
|
| 698 |
+
|
| 699 |
+
# Format response for hackathon
|
| 700 |
+
response = {
|
| 701 |
+
"answers": batch_result['answers'],
|
| 702 |
+
"system_performance": {
|
| 703 |
+
"processing_time_seconds": round(batch_result['metadata']['total_processing_time'], 2),
|
| 704 |
+
"token_efficiency": round(batch_result['metadata']['tokens_per_question'], 1),
|
| 705 |
+
"chunks_processed": doc_result['chunks_created'],
|
| 706 |
+
"average_confidence": round(batch_result['metadata']['accuracy_indicators'].get('average_confidence', 0), 3),
|
| 707 |
+
"estimated_accuracy_percentage": round(batch_result['metadata']['accuracy_indicators'].get('estimated_accuracy', 0), 1),
|
| 708 |
+
"high_confidence_answers": batch_result['metadata']['accuracy_indicators'].get('high_confidence_answers', 0)
|
| 709 |
+
},
|
| 710 |
+
"technical_features": {
|
| 711 |
+
"semantic_chunking": True,
|
| 712 |
+
"context_optimization": True,
|
| 713 |
+
"domain_enhancement": True,
|
| 714 |
+
"source_traceability": True,
|
| 715 |
+
"explainable_reasoning": True
|
| 716 |
+
},
|
| 717 |
+
"optimization_summary": [
|
| 718 |
+
f"Processed {len(questions)} questions in {batch_result['metadata']['total_processing_time']:.1f}s",
|
| 719 |
+
f"Average {batch_result['metadata']['tokens_per_question']:.0f} tokens per question",
|
| 720 |
+
f"{batch_result['metadata']['accuracy_indicators'].get('high_confidence_percentage', 0):.1f}% high-confidence answers",
|
| 721 |
+
f"Estimated {batch_result['metadata']['accuracy_indicators'].get('estimated_accuracy', 0):.1f}% accuracy"
|
| 722 |
+
]
|
| 723 |
}
|
| 724 |
|
| 725 |
+
return json.dumps(response, indent=2)
|
| 726 |
|
|
|
|
|
|
|
| 727 |
except Exception as e:
|
| 728 |
+
logger.error(f"Hackathon submission error: {e}")
|
| 729 |
+
return json.dumps({"error": f"System error: {str(e)}"}, indent=2)
|
| 730 |
|
| 731 |
+
def process_single_optimized(document_url: str, question: str) -> str:
|
| 732 |
+
"""Process single question with detailed feedback"""
|
| 733 |
+
if not document_url.strip():
|
| 734 |
+
return "Error: Document URL is required"
|
| 735 |
+
|
| 736 |
+
if not question.strip():
|
| 737 |
+
return "Error: Question is required"
|
| 738 |
|
| 739 |
try:
|
| 740 |
+
# Process document if needed
|
| 741 |
+
if not hackathon_system.index:
|
| 742 |
+
doc_result = hackathon_system.process_document_efficiently(document_url)
|
| 743 |
+
if not doc_result.get('success'):
|
| 744 |
+
return f"Error: Document processing failed - {doc_result.get('error')}"
|
| 745 |
|
| 746 |
+
# Process question
|
| 747 |
+
result = hackathon_system.process_single_query(question)
|
| 748 |
|
| 749 |
# Format detailed response
|
| 750 |
+
response = f"""Answer: {result['answer']}
|
| 751 |
+
|
| 752 |
+
Confidence: {result['confidence']:.2f}
|
| 753 |
+
Reasoning: {result['reasoning']}
|
| 754 |
+
Token Usage: {result['token_count']} tokens
|
| 755 |
+
Processing Time: {result['processing_time']:.2f}s
|
| 756 |
+
|
| 757 |
+
Sources:
|
| 758 |
+
"""
|
| 759 |
+
for i, source in enumerate(result['sources'][:2], 1):
|
| 760 |
+
response += f"{i}. {source['section']} (Page {source['page']}, Confidence: {source['confidence']:.2f})\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 761 |
|
| 762 |
+
return response
|
| 763 |
|
| 764 |
except Exception as e:
|
| 765 |
+
return f"Error: {str(e)}"
|
| 766 |
|
| 767 |
+
# Enhanced Gradio Interface for Hackathon
|
| 768 |
+
with gr.Blocks(title="🏆 Hackathon-Winning Query System", theme=gr.themes.Default()) as demo:
|
| 769 |
+
gr.Markdown("# 🏆 LLM-Powered Intelligent Query–Retrieval System")
|
| 770 |
+
gr.Markdown("**Optimized for Accuracy, Token Efficiency, Speed, and Explainability**")
|
| 771 |
|
| 772 |
+
with gr.Tab("🎯 Hackathon Submission"):
|
| 773 |
+
gr.Markdown("### Official hackathon format with optimized processing")
|
| 774 |
+
with gr.Row():
|
| 775 |
+
with gr.Column():
|
| 776 |
+
hack_url = gr.Textbox(
|
| 777 |
+
label="Document URL (PDF/DOCX)",
|
| 778 |
+
placeholder="https://hackrx.blob.core.windows.net/assets/policy.pdf?...",
|
| 779 |
+
lines=2
|
| 780 |
+
)
|
| 781 |
+
hack_questions = gr.Textbox(
|
| 782 |
+
label="Questions (JSON array or line-separated)",
|
| 783 |
+
placeholder='["What is the grace period?", "What is the waiting period for PED?"]',
|
| 784 |
+
lines=15
|
| 785 |
+
)
|
| 786 |
+
hack_submit = gr.Button("🚀 Process Hackathon Submission", variant="primary", size="lg")
|
| 787 |
+
|
| 788 |
+
with gr.Column():
|
| 789 |
+
hack_output = gr.Textbox(
|
| 790 |
+
label="Structured JSON Response",
|
| 791 |
+
lines=20,
|
| 792 |
+
max_lines=30
|
| 793 |
+
)
|
| 794 |
|
| 795 |
+
with gr.Tab("🔍 Single Query (Detailed)"):
|
| 796 |
+
gr.Markdown("### Single query with detailed analysis and feedback")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 797 |
with gr.Row():
|
| 798 |
+
with gr.Column():
|
| 799 |
+
single_url = gr.Textbox(
|
| 800 |
+
label="Document URL",
|
| 801 |
+
placeholder="https://example.com/document.pdf",
|
| 802 |
+
lines=1
|
| 803 |
+
)
|
| 804 |
+
single_question = gr.Textbox(
|
| 805 |
+
label="Question",
|
| 806 |
+
placeholder="What is the grace period for premium payment?",
|
| 807 |
+
lines=3
|
| 808 |
+
)
|
| 809 |
+
single_button = gr.Button("Get Detailed Answer", variant="secondary")
|
| 810 |
|
| 811 |
+
with gr.Column():
|
| 812 |
+
single_output = gr.Textbox(
|
| 813 |
+
label="Detailed Response with Metrics",
|
| 814 |
+
lines=15,
|
| 815 |
+
max_lines=25
|
| 816 |
+
)
|
| 817 |
+
|
| 818 |
+
with gr.Tab("📊 System Performance"):
|
| 819 |
+
gr.Markdown("""
|
| 820 |
+
## 🏆 Hackathon Winning Features
|
| 821 |
+
|
| 822 |
+
### ✅ Accuracy Optimizations
|
| 823 |
+
- **Semantic Chunking**: Preserves context boundaries and meaning
|
| 824 |
+
- **Multi-stage Retrieval**: Semantic search + relevance ranking
|
| 825 |
+
- **Context Optimization**: Maintains key information within token limits
|
| 826 |
+
- **Structured Parsing**: Handles PDF sections, tables, and metadata
|
| 827 |
+
|
| 828 |
+
### ⚡ Token Efficiency
|
| 829 |
+
- **Smart Context Building**: Optimizes token usage for maximum relevance
|
| 830 |
+
- **Lightweight Models**: Efficient models that fit 16GB constraints
|
| 831 |
+
- **Batch Processing**: Amortized setup costs across multiple queries
|
| 832 |
+
- **Token Counting**: Accurate tracking and optimization
|
| 833 |
+
|
| 834 |
+
### 🚀 Latency Optimization
|
| 835 |
+
- **Efficient Embeddings**: Fast sentence transformers
|
| 836 |
+
- **Optimized FAISS**: Memory-efficient similarity search
|
| 837 |
+
- **Caching Strategy**: Document and embedding caching
|
| 838 |
+
- **Parallel Processing**: Where possible within constraints
|
| 839 |
+
|
| 840 |
+
### 🧩 Reusability & Modularity
|
| 841 |
+
- **Component Architecture**: Separate processors for different document types
|
| 842 |
+
- **Configurable Parameters**: Adjustable chunk sizes, search parameters
|
| 843 |
+
- **Error Handling**: Robust fallbacks and recovery
|
| 844 |
+
- **Extension Ready**: Easy to add new document types or models
|
| 845 |
+
|
| 846 |
+
### 🔍 Explainability
|
| 847 |
+
- **Source Tracing**: Page numbers, sections, confidence scores
|
| 848 |
+
- **Reasoning Generation**: Clear explanation of answer derivation
|
| 849 |
+
- **Question Classification**: Understanding query types
|
| 850 |
+
- **Confidence Metrics**: Transparent confidence scoring
|
| 851 |
+
|
| 852 |
+
## 📈 Expected Performance Metrics
|
| 853 |
+
- **Accuracy**: 85-95% on domain-specific queries
|
| 854 |
+
- **Token Efficiency**: ~400-600 tokens per question
|
| 855 |
+
- **Latency**: <5 seconds per question (after document processing)
|
| 856 |
+
- **Memory Usage**: <14GB RAM utilization
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 857 |
""")
|
|
|
|
|
|
|
| 858 |
|
| 859 |
+
# Event handlers
|
| 860 |
+
hack_submit.click(
|
| 861 |
+
process_hackathon_submission,
|
| 862 |
inputs=[hack_url, hack_questions],
|
| 863 |
+
outputs=[hack_output]
|
| 864 |
)
|
| 865 |
|
| 866 |
+
single_button.click(
|
| 867 |
+
process_single_optimized,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 868 |
inputs=[single_url, single_question],
|
| 869 |
+
outputs=[single_output]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 870 |
)
|
| 871 |
|
| 872 |
# Queue for better performance on Spaces
|