sohamchitimali commited on
Commit
6be6cf5
·
1 Parent(s): a6c1558

Using old Frontend

Browse files
Files changed (1) hide show
  1. app.py +169 -461
app.py CHANGED
@@ -666,499 +666,207 @@ class HighPerformanceSystem:
666
  # Initialize the system
667
  high_performance_system = HighPerformanceSystem()
668
 
669
- def process_hackathon_submission(url, questions_text):
670
- """Process hackathon submission format"""
671
- if not url or not questions_text:
672
- return "Please provide both document URL and questions."
673
-
674
  try:
675
- # Try to parse as JSON first
676
- if questions_text.strip().startswith('[') and questions_text.strip().endswith(']'):
677
- questions = json.loads(questions_text)
678
- else:
679
- # Split by lines if not JSON
 
 
 
 
 
 
 
 
 
680
  questions = [q.strip() for q in questions_text.split('\n') if q.strip()]
681
 
682
  if not questions:
683
- return "No valid questions found. Please provide questions as JSON array or one per line."
684
 
685
  # Process document
686
- doc_result = high_performance_system.process_document_optimized(url)
687
- if not doc_result.get("success"):
688
- return f"Document processing failed: {doc_result.get('error')}"
689
 
690
  # Process questions
691
- batch_result = high_performance_system.process_batch_queries_optimized(questions)
692
-
693
- # Format as hackathon response
694
- hackathon_response = {
695
- "answers": [answer['answer'] for answer in batch_result['answers']],
696
- "metadata": {
697
- "processing_time": batch_result['processing_time'],
698
- "chunks_created": doc_result['chunks_created'],
699
- "total_questions": len(questions),
700
- "model_info": "Qwen2.5-1.5B-Instruct (CPU-optimized)"
701
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
702
  }
703
 
704
- return json.dumps(hackathon_response, indent=2)
705
 
706
- except json.JSONDecodeError as e:
707
- return f"JSON parsing error: {str(e)}. Please provide valid JSON array or one question per line."
708
  except Exception as e:
709
- return f"Error processing submission: {str(e)}"
 
710
 
711
- def process_single_question(url, question):
712
- """Process single question with detailed response"""
713
- if not url or not question:
714
- return "Please provide both document URL and question."
 
 
 
715
 
716
  try:
717
- # Process document
718
- doc_result = high_performance_system.process_document_optimized(url)
719
- if not doc_result.get("success"):
720
- return f"Document processing failed: {doc_result.get('error')}"
 
721
 
722
- # Process single question
723
- result = high_performance_system.process_single_query_optimized(question)
724
 
725
  # Format detailed response
726
- detailed_response = {
727
- "question": question,
728
- "answer": result['answer'],
729
- "confidence": result['confidence'],
730
- "reasoning": result['reasoning'],
731
- "metadata": {
732
- "processing_time": f"{result['processing_time']:.2f}s",
733
- "source_chunks": result['source_chunks'],
734
- "token_count": result['token_count'],
735
- "document_stats": {
736
- "chunks_created": doc_result['chunks_created'],
737
- "total_words": doc_result['total_words'],
738
- "processing_time": f"{doc_result['processing_time']:.2f}s"
739
- }
740
- }
741
- }
742
 
743
- return json.dumps(detailed_response, indent=2)
744
 
745
  except Exception as e:
746
- return f"Error processing question: {str(e)}"
747
 
748
- def hackathon_wrapper(url, questions_text):
749
- """Wrapper to show processing status for the hackathon tab."""
750
- # Show status message
751
- yield gr.Markdown(" Processing... Please wait.", visible=True)
752
 
753
- # Call the original function
754
- result = process_hackathon_submission(url, questions_text)
755
-
756
- # Hide status message and return the final result
757
- yield gr.Markdown(visible=False), result
758
-
759
- def single_query_wrapper(url, question):
760
- """Wrapper to show processing status for the single query tab."""
761
- # Show status message
762
- yield gr.Markdown("⏳ Processing... Please wait.", visible=True)
763
-
764
- # Call the original function
765
- result = process_single_question(url, question)
 
 
 
 
 
 
 
 
 
766
 
767
- # Hide status message and return the final result
768
- yield gr.Markdown(visible=False), result
769
-
770
- # --- Gradio Interface (CPU-Optimized) ---
771
- with gr.Blocks(
772
- theme=gr.themes.Soft(
773
- primary_hue="indigo",
774
- secondary_hue="blue",
775
- neutral_hue="slate",
776
- font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui", "sans-serif"],
777
- ),
778
- css="""
779
- /* --- Custom CSS for a Professional Look --- */
780
- :root {
781
- --primary-color: #4f46e5;
782
- --secondary-color: #1e40af;
783
- --accent-color: #06b6d4;
784
- --background-color: #f8fafc;
785
- --card-background: linear-gradient(145deg, #ffffff, #f1f5f9);
786
- --text-color: #334155;
787
- --text-secondary: #64748b;
788
- --border-color: #e2e8f0;
789
- --success-color: #10b981;
790
- --warning-color: #f59e0b;
791
- --shadow-sm: 0 1px 2px 0 rgba(0, 0, 0, 0.05);
792
- --shadow-md: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -2px rgba(0, 0, 0, 0.1);
793
- --shadow-lg: 0 10px 15px -3px rgba(0, 0, 0, 0.1), 0 4px 6px -2px rgba(0, 0, 0, 0.05);
794
- --border-radius: 12px;
795
- --border-radius-sm: 8px;
796
- }
797
-
798
- .gradio-container {
799
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
800
- min-height: 100vh;
801
- }
802
-
803
- .main-content {
804
- background: var(--card-background);
805
- border-radius: var(--border-radius);
806
- box-shadow: var(--shadow-lg);
807
- margin: 1rem;
808
- overflow: hidden;
809
- }
810
-
811
- .app-header {
812
- text-align: center;
813
- padding: 3rem 2rem;
814
- background: linear-gradient(135deg, var(--primary-color) 0%, var(--secondary-color) 50%, var(--accent-color) 100%);
815
- color: white;
816
- position: relative;
817
- overflow: hidden;
818
- }
819
-
820
- .app-header::before {
821
- content: '';
822
- position: absolute;
823
- top: -50%;
824
- left: -50%;
825
- width: 200%;
826
- height: 200%;
827
- background: repeating-linear-gradient(
828
- 45deg,
829
- transparent,
830
- transparent 10px,
831
- rgba(255,255,255,0.05) 10px,
832
- rgba(255,255,255,0.05) 20px
833
- );
834
- animation: shimmer 20s linear infinite;
835
- }
836
-
837
- @keyframes shimmer {
838
- 0% { transform: translateX(-50%) translateY(-50%) rotate(0deg); }
839
- 100% { transform: translateX(-50%) translateY(-50%) rotate(360deg); }
840
- }
841
-
842
- .app-header h1 {
843
- font-size: 2.75rem;
844
- font-weight: 800;
845
- margin-bottom: 0.75rem;
846
- position: relative;
847
- z-index: 2;
848
- text-shadow: 2px 2px 4px rgba(0,0,0,0.3);
849
- }
850
-
851
- .app-header p {
852
- font-size: 1.2rem;
853
- opacity: 0.95;
854
- position: relative;
855
- z-index: 2;
856
- font-weight: 500;
857
- }
858
-
859
- .feature-badge {
860
- display: inline-block;
861
- background: rgba(255,255,255,0.2);
862
- padding: 0.5rem 1rem;
863
- border-radius: 50px;
864
- margin: 0.25rem;
865
- font-size: 0.9rem;
866
- font-weight: 600;
867
- backdrop-filter: blur(10px);
868
- }
869
-
870
- .status-text {
871
- padding: 1.5rem !important;
872
- background: linear-gradient(135deg, #e0e7ff 0%, #c7d2fe 100%) !important;
873
- color: var(--primary-color) !important;
874
- border-radius: var(--border-radius) !important;
875
- text-align: center;
876
- border: 2px solid rgba(79, 70, 229, 0.2) !important;
877
- font-weight: 600;
878
- font-size: 1.1rem;
879
- box-shadow: var(--shadow-md) !important;
880
- }
881
-
882
- .input-container {
883
- background: var(--card-background);
884
- border-radius: var(--border-radius);
885
- padding: 2rem;
886
- margin: 1rem;
887
- box-shadow: var(--shadow-md);
888
- border: 1px solid var(--border-color);
889
- }
890
-
891
- .output-container {
892
- background: var(--card-background);
893
- border-radius: var(--border-radius);
894
- padding: 2rem;
895
- margin: 1rem;
896
- box-shadow: var(--shadow-md);
897
- border: 1px solid var(--border-color);
898
- min-height: 600px;
899
- }
900
-
901
- .section-title {
902
- color: var(--primary-color);
903
- font-size: 1.5rem;
904
- font-weight: 700;
905
- margin-bottom: 1.5rem;
906
- display: flex;
907
- align-items: center;
908
- gap: 0.5rem;
909
- }
910
-
911
- .tab-content {
912
- padding: 1.5rem;
913
- background: white;
914
- border-radius: var(--border-radius-sm);
915
- box-shadow: var(--shadow-sm);
916
- border: 1px solid var(--border-color);
917
- }
918
-
919
- .gr-button {
920
- border-radius: var(--border-radius-sm) !important;
921
- font-weight: 600 !important;
922
- transition: all 0.3s ease !important;
923
- box-shadow: var(--shadow-sm) !important;
924
- }
925
-
926
- .gr-button:hover {
927
- transform: translateY(-2px) !important;
928
- box-shadow: var(--shadow-md) !important;
929
- }
930
-
931
- .gr-textbox textarea, .gr-textbox input {
932
- border-radius: var(--border-radius-sm) !important;
933
- border: 2px solid var(--border-color) !important;
934
- transition: border-color 0.3s ease !important;
935
- }
936
-
937
- .gr-textbox textarea:focus, .gr-textbox input:focus {
938
- border-color: var(--primary-color) !important;
939
- box-shadow: 0 0 0 3px rgba(79, 70, 229, 0.1) !important;
940
- }
941
-
942
- .example-box {
943
- background: #f1f5f9;
944
- border-radius: var(--border-radius-sm);
945
- padding: 1rem;
946
- margin: 1rem 0;
947
- border-left: 4px solid var(--accent-color);
948
- }
949
-
950
- .stats-grid {
951
- display: grid;
952
- grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
953
- gap: 1rem;
954
- margin: 1rem 0;
955
- }
956
-
957
- .stat-card {
958
- background: white;
959
- padding: 1.5rem;
960
- border-radius: var(--border-radius-sm);
961
- text-align: center;
962
- box-shadow: var(--shadow-sm);
963
- border: 1px solid var(--border-color);
964
- }
965
-
966
- .stat-number {
967
- font-size: 2rem;
968
- font-weight: 800;
969
- color: var(--primary-color);
970
- }
971
-
972
- .stat-label {
973
- color: var(--text-secondary);
974
- font-size: 0.9rem;
975
- margin-top: 0.5rem;
976
- }
977
- """
978
- ) as demo:
979
-
980
- # --- Main Container ---
981
- with gr.Column(elem_classes="main-content"):
982
-
983
- # --- Header ---
984
- gr.HTML("""
985
- <div class="app-header">
986
- <h1>🚀 CPU-Optimized Document QA System</h1>
987
- <p><strong>Powered by Qwen2.5-1.5B-Instruct + MiniLM Embeddings + RAG Pipeline</strong></p>
988
- <div style="margin-top: 1.5rem;">
989
- <span class="feature-badge">🔒 Insurance Documents</span>
990
- <span class="feature-badge">⚖️ Legal Analysis</span>
991
- <span class="feature-badge">👥 HR Compliance</span>
992
- <span class="feature-badge">📊 Smart Extraction</span>
993
- <span class="feature-badge">💻 CPU Optimized</span>
994
- </div>
995
- </div>
996
- """)
997
-
998
- # --- Stats Section ---
999
- gr.HTML("""
1000
- <div class="stats-grid" style="padding: 2rem;">
1001
- <div class="stat-card">
1002
- <div class="stat-number">1.5B</div>
1003
- <div class="stat-label">Parameters</div>
1004
- </div>
1005
- <div class="stat-card">
1006
- <div class="stat-number">CPU</div>
1007
- <div class="stat-label">Optimized</div>
1008
- </div>
1009
- <div class="stat-card">
1010
- <div class="stat-number">< 5s</div>
1011
- <div class="stat-label">Response Time</div>
1012
- </div>
1013
- <div class="stat-card">
1014
- <div class="stat-number">Multi</div>
1015
- <div class="stat-label">Document Types</div>
1016
- </div>
1017
- </div>
1018
- """)
1019
-
1020
- # --- Main Content Area ---
1021
  with gr.Row():
 
 
 
 
 
 
 
 
 
 
 
 
1022
 
1023
- # --- Left Column: Inputs ---
1024
- with gr.Column(scale=1):
1025
- with gr.Column(elem_classes="input-container"):
1026
- with gr.Tabs():
1027
-
1028
- # --- Hackathon Submission Tab ---
1029
- with gr.Tab("🎯 Hackathon Submission", id=0):
1030
- with gr.Column(elem_classes="tab-content"):
1031
- gr.HTML('<h3 class="section-title">📄 Document Analysis Setup</h3>')
1032
-
1033
- hack_url = gr.Textbox(
1034
- label="📄 Document URL (PDF/DOCX)",
1035
- placeholder="Enter the public URL of the document...",
1036
- lines=2,
1037
- info="Supports PDF and DOCX formats from public URLs"
1038
- )
1039
-
1040
- hack_questions = gr.Textbox(
1041
- label="❓ Questions (JSON array or one per line)",
1042
- placeholder='["What is the grace period?", "Is maternity covered?"]',
1043
- lines=8,
1044
- info="Enter questions as JSON array or one question per line"
1045
- )
1046
-
1047
- gr.HTML("""
1048
- <div class="example-box">
1049
- <strong>💡 Example:</strong><br>
1050
- <small>URL: Insurance policy document<br>
1051
- Questions: Grace period, coverage details, waiting periods</small>
1052
- </div>
1053
- """)
1054
-
1055
- gr.Examples(
1056
- examples=[
1057
- [
1058
- "https://hackrx.blob.core.windows.net/assets/policy.pdf?sp=r&st=2024-07-28T17:58:36Z&se=2024-08-05T01:58:36Z&spr=https&sv=2022-11-02&sr=b&sig=P3mH1m6xY95UPp5qT24l6j2l9V82p8vGEx2tTQP4fF0%3D",
1059
- '["What is the grace period for premium payment?","What is the waiting period for Pre-existing Diseases?","is maternity covered in this policy?"]'
1060
- ]
1061
- ],
1062
- inputs=[hack_url, hack_questions],
1063
- label="Sample Insurance Policy Analysis"
1064
- )
1065
-
1066
- with gr.Row():
1067
- hack_clear_btn = gr.Button("🗑️ Clear", variant="secondary", size="sm")
1068
- hack_submit_btn = gr.Button("🚀 Process Submission", variant="primary", size="lg")
1069
-
1070
- hack_status = gr.Markdown(visible=False, elem_classes="status-text")
1071
-
1072
- # --- Single Query Analysis Tab ---
1073
- with gr.Tab("🔍 Single Query Analysis", id=1):
1074
- with gr.Column(elem_classes="tab-content"):
1075
- gr.HTML('<h3 class="section-title">🔍 Detailed Document Query</h3>')
1076
-
1077
- single_url = gr.Textbox(
1078
- label="📄 Document URL",
1079
- placeholder="Enter the public URL of the document...",
1080
- lines=2,
1081
- info="URL to your PDF or DOCX document"
1082
- )
1083
-
1084
- single_question = gr.Textbox(
1085
- label="❓ Your Question",
1086
- placeholder="What is the waiting period for cataract surgery?",
1087
- lines=5,
1088
- info="Ask a specific question about your document"
1089
- )
1090
-
1091
- gr.HTML("""
1092
- <div class="example-box">
1093
- <strong>💡 Pro Tip:</strong><br>
1094
- <small>Be specific in your questions for better results. Include context like "waiting period", "coverage amount", or "eligibility criteria".</small>
1095
- </div>
1096
- """)
1097
-
1098
- with gr.Row():
1099
- single_clear_btn = gr.Button("🗑️ Clear", variant="secondary", size="sm")
1100
- single_submit_btn = gr.Button("🔍 Get Detailed Answer", variant="primary", size="lg")
1101
-
1102
- single_status = gr.Markdown(visible=False, elem_classes="status-text")
1103
-
1104
- # --- Right Column: Outputs ---
1105
- with gr.Column(scale=2):
1106
- with gr.Column(elem_classes="output-container"):
1107
- gr.HTML('<h3 class="section-title">📊 Analysis Results</h3>')
1108
-
1109
- with gr.Tabs():
1110
- with gr.Tab("✅ Hackathon Results", id=2):
1111
- hack_output = gr.Textbox(
1112
- label="📊 Hackathon JSON Response",
1113
- lines=25,
1114
- max_lines=35,
1115
- interactive=False,
1116
- info="Complete JSON response with all answers and metadata",
1117
- show_copy_button=True
1118
- )
1119
-
1120
- with gr.Tab("🔍 Single Query Results", id=3):
1121
- single_output = gr.Textbox(
1122
- label="📋 Detailed Single Query Response",
1123
- lines=25,
1124
- max_lines=35,
1125
- interactive=False,
1126
- info="Comprehensive answer with supporting context",
1127
- show_copy_button=True
1128
- )
1129
-
1130
- # --- Footer ---
1131
- gr.HTML("""
1132
- <div style="text-align: center; padding: 2rem; color: #64748b; border-top: 1px solid #e2e8f0; margin-top: 2rem;">
1133
- <p><strong>⚡ CPU-Optimized for Hugging Face Spaces</strong></p>
1134
- <p>Built with advanced RAG architecture for maximum accuracy on CPU hardware</p>
1135
- </div>
1136
  """)
1137
-
1138
- # --- Event Handlers ---
1139
 
1140
- # Hackathon Tab Logic
1141
- hack_submit_btn.click(
1142
- fn=hackathon_wrapper,
1143
  inputs=[hack_url, hack_questions],
1144
- outputs=[hack_status, hack_output]
1145
  )
1146
 
1147
- hack_clear_btn.click(
1148
- lambda: (None, None, None, gr.Markdown(visible=False)),
1149
- outputs=[hack_url, hack_questions, hack_output, hack_status]
1150
- )
1151
-
1152
- # Single Query Tab Logic
1153
- single_submit_btn.click(
1154
- fn=single_query_wrapper,
1155
  inputs=[single_url, single_question],
1156
- outputs=[single_status, single_output]
1157
- )
1158
-
1159
- single_clear_btn.click(
1160
- lambda: (None, None, None, gr.Markdown(visible=False)),
1161
- outputs=[single_url, single_question, single_output, single_status]
1162
  )
1163
 
1164
  # Queue for better performance on Spaces
 
666
  # Initialize the system
667
  high_performance_system = HighPerformanceSystem()
668
 
669
+ def process_hackathon_submission(document_url: str, questions_text: str) -> str:
670
+ """Main function for hackathon submission"""
 
 
 
671
  try:
672
+ # Validate inputs
673
+ if not document_url.strip():
674
+ return json.dumps({"error": "Document URL is required"}, indent=2)
675
+
676
+ if not questions_text.strip():
677
+ return json.dumps({"error": "Questions are required"}, indent=2)
678
+
679
+ # Parse questions
680
+ try:
681
+ if questions_text.strip().startswith('['):
682
+ questions = json.loads(questions_text)
683
+ else:
684
+ questions = [q.strip() for q in questions_text.split('\n') if q.strip()]
685
+ except json.JSONDecodeError:
686
  questions = [q.strip() for q in questions_text.split('\n') if q.strip()]
687
 
688
  if not questions:
689
+ return json.dumps({"error": "No valid questions found"}, indent=2)
690
 
691
  # Process document
692
+ doc_result = hackathon_system.process_document_efficiently(document_url)
693
+ if not doc_result.get('success'):
694
+ return json.dumps({"error": f"Document processing failed: {doc_result.get('error')}"}, indent=2)
695
 
696
  # Process questions
697
+ batch_result = hackathon_system.process_batch_queries(questions)
698
+
699
+ # Format response for hackathon
700
+ response = {
701
+ "answers": batch_result['answers'],
702
+ "system_performance": {
703
+ "processing_time_seconds": round(batch_result['metadata']['total_processing_time'], 2),
704
+ "token_efficiency": round(batch_result['metadata']['tokens_per_question'], 1),
705
+ "chunks_processed": doc_result['chunks_created'],
706
+ "average_confidence": round(batch_result['metadata']['accuracy_indicators'].get('average_confidence', 0), 3),
707
+ "estimated_accuracy_percentage": round(batch_result['metadata']['accuracy_indicators'].get('estimated_accuracy', 0), 1),
708
+ "high_confidence_answers": batch_result['metadata']['accuracy_indicators'].get('high_confidence_answers', 0)
709
+ },
710
+ "technical_features": {
711
+ "semantic_chunking": True,
712
+ "context_optimization": True,
713
+ "domain_enhancement": True,
714
+ "source_traceability": True,
715
+ "explainable_reasoning": True
716
+ },
717
+ "optimization_summary": [
718
+ f"Processed {len(questions)} questions in {batch_result['metadata']['total_processing_time']:.1f}s",
719
+ f"Average {batch_result['metadata']['tokens_per_question']:.0f} tokens per question",
720
+ f"{batch_result['metadata']['accuracy_indicators'].get('high_confidence_percentage', 0):.1f}% high-confidence answers",
721
+ f"Estimated {batch_result['metadata']['accuracy_indicators'].get('estimated_accuracy', 0):.1f}% accuracy"
722
+ ]
723
  }
724
 
725
+ return json.dumps(response, indent=2)
726
 
 
 
727
  except Exception as e:
728
+ logger.error(f"Hackathon submission error: {e}")
729
+ return json.dumps({"error": f"System error: {str(e)}"}, indent=2)
730
 
731
+ def process_single_optimized(document_url: str, question: str) -> str:
732
+ """Process single question with detailed feedback"""
733
+ if not document_url.strip():
734
+ return "Error: Document URL is required"
735
+
736
+ if not question.strip():
737
+ return "Error: Question is required"
738
 
739
  try:
740
+ # Process document if needed
741
+ if not hackathon_system.index:
742
+ doc_result = hackathon_system.process_document_efficiently(document_url)
743
+ if not doc_result.get('success'):
744
+ return f"Error: Document processing failed - {doc_result.get('error')}"
745
 
746
+ # Process question
747
+ result = hackathon_system.process_single_query(question)
748
 
749
  # Format detailed response
750
+ response = f"""Answer: {result['answer']}
751
+
752
+ Confidence: {result['confidence']:.2f}
753
+ Reasoning: {result['reasoning']}
754
+ Token Usage: {result['token_count']} tokens
755
+ Processing Time: {result['processing_time']:.2f}s
756
+
757
+ Sources:
758
+ """
759
+ for i, source in enumerate(result['sources'][:2], 1):
760
+ response += f"{i}. {source['section']} (Page {source['page']}, Confidence: {source['confidence']:.2f})\n"
 
 
 
 
 
761
 
762
+ return response
763
 
764
  except Exception as e:
765
+ return f"Error: {str(e)}"
766
 
767
+ # Enhanced Gradio Interface for Hackathon
768
+ with gr.Blocks(title="🏆 Hackathon-Winning Query System", theme=gr.themes.Default()) as demo:
769
+ gr.Markdown("# 🏆 LLM-Powered Intelligent Query–Retrieval System")
770
+ gr.Markdown("**Optimized for Accuracy, Token Efficiency, Speed, and Explainability**")
771
 
772
+ with gr.Tab("🎯 Hackathon Submission"):
773
+ gr.Markdown("### Official hackathon format with optimized processing")
774
+ with gr.Row():
775
+ with gr.Column():
776
+ hack_url = gr.Textbox(
777
+ label="Document URL (PDF/DOCX)",
778
+ placeholder="https://hackrx.blob.core.windows.net/assets/policy.pdf?...",
779
+ lines=2
780
+ )
781
+ hack_questions = gr.Textbox(
782
+ label="Questions (JSON array or line-separated)",
783
+ placeholder='["What is the grace period?", "What is the waiting period for PED?"]',
784
+ lines=15
785
+ )
786
+ hack_submit = gr.Button("🚀 Process Hackathon Submission", variant="primary", size="lg")
787
+
788
+ with gr.Column():
789
+ hack_output = gr.Textbox(
790
+ label="Structured JSON Response",
791
+ lines=20,
792
+ max_lines=30
793
+ )
794
 
795
+ with gr.Tab("🔍 Single Query (Detailed)"):
796
+ gr.Markdown("### Single query with detailed analysis and feedback")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
797
  with gr.Row():
798
+ with gr.Column():
799
+ single_url = gr.Textbox(
800
+ label="Document URL",
801
+ placeholder="https://example.com/document.pdf",
802
+ lines=1
803
+ )
804
+ single_question = gr.Textbox(
805
+ label="Question",
806
+ placeholder="What is the grace period for premium payment?",
807
+ lines=3
808
+ )
809
+ single_button = gr.Button("Get Detailed Answer", variant="secondary")
810
 
811
+ with gr.Column():
812
+ single_output = gr.Textbox(
813
+ label="Detailed Response with Metrics",
814
+ lines=15,
815
+ max_lines=25
816
+ )
817
+
818
+ with gr.Tab("📊 System Performance"):
819
+ gr.Markdown("""
820
+ ## 🏆 Hackathon Winning Features
821
+
822
+ ### Accuracy Optimizations
823
+ - **Semantic Chunking**: Preserves context boundaries and meaning
824
+ - **Multi-stage Retrieval**: Semantic search + relevance ranking
825
+ - **Context Optimization**: Maintains key information within token limits
826
+ - **Structured Parsing**: Handles PDF sections, tables, and metadata
827
+
828
+ ### Token Efficiency
829
+ - **Smart Context Building**: Optimizes token usage for maximum relevance
830
+ - **Lightweight Models**: Efficient models that fit 16GB constraints
831
+ - **Batch Processing**: Amortized setup costs across multiple queries
832
+ - **Token Counting**: Accurate tracking and optimization
833
+
834
+ ### 🚀 Latency Optimization
835
+ - **Efficient Embeddings**: Fast sentence transformers
836
+ - **Optimized FAISS**: Memory-efficient similarity search
837
+ - **Caching Strategy**: Document and embedding caching
838
+ - **Parallel Processing**: Where possible within constraints
839
+
840
+ ### 🧩 Reusability & Modularity
841
+ - **Component Architecture**: Separate processors for different document types
842
+ - **Configurable Parameters**: Adjustable chunk sizes, search parameters
843
+ - **Error Handling**: Robust fallbacks and recovery
844
+ - **Extension Ready**: Easy to add new document types or models
845
+
846
+ ### 🔍 Explainability
847
+ - **Source Tracing**: Page numbers, sections, confidence scores
848
+ - **Reasoning Generation**: Clear explanation of answer derivation
849
+ - **Question Classification**: Understanding query types
850
+ - **Confidence Metrics**: Transparent confidence scoring
851
+
852
+ ## 📈 Expected Performance Metrics
853
+ - **Accuracy**: 85-95% on domain-specific queries
854
+ - **Token Efficiency**: ~400-600 tokens per question
855
+ - **Latency**: <5 seconds per question (after document processing)
856
+ - **Memory Usage**: <14GB RAM utilization
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
857
  """)
 
 
858
 
859
+ # Event handlers
860
+ hack_submit.click(
861
+ process_hackathon_submission,
862
  inputs=[hack_url, hack_questions],
863
+ outputs=[hack_output]
864
  )
865
 
866
+ single_button.click(
867
+ process_single_optimized,
 
 
 
 
 
 
868
  inputs=[single_url, single_question],
869
+ outputs=[single_output]
 
 
 
 
 
870
  )
871
 
872
  # Queue for better performance on Spaces