IsmatS commited on
Commit
3b014b5
·
1 Parent(s): 0203ccf
Files changed (2) hide show
  1. .gitignore +1 -1
  2. app/templates/index.html +140 -0
.gitignore CHANGED
@@ -20,7 +20,7 @@ pnpm-debug.log*
20
  coverage
21
  .idea
22
  *.iml
23
- *.log
24
  __pycache__
25
  *.pyc
26
  *.pyo
 
20
  coverage
21
  .idea
22
  *.iml
23
+ # *.log
24
  __pycache__
25
  *.pyc
26
  *.pyo
app/templates/index.html ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>SOCAR Historical Documents AI System</title>
7
+ <link rel="stylesheet" href="{{ url_for('static', path='/css/style.css') }}">
8
+ </head>
9
+ <body>
10
+ <div class="container">
11
+ <header>
12
+ <div class="logo">
13
+ <h1>SOCAR AI Document System</h1>
14
+ <p class="subtitle">Historical Oil & Gas Documents Processing</p>
15
+ </div>
16
+ </header>
17
+
18
+ <main>
19
+ <div class="hero">
20
+ <h2>Advanced Document Intelligence Platform</h2>
21
+ <p>Powered by Vision-Language Models and Retrieval-Augmented Generation</p>
22
+ </div>
23
+
24
+ <div class="services">
25
+ <div class="service-card ocr-card">
26
+ <div class="icon">📄</div>
27
+ <h3>OCR Endpoint</h3>
28
+ <p>Extract text from historical PDFs using Llama-4-Maverick-17B Vision model</p>
29
+ <ul class="features">
30
+ <li>Multi-language support (Azerbaijani, Russian, English)</li>
31
+ <li>Handwriting recognition</li>
32
+ <li>Image detection and referencing</li>
33
+ <li>88.3% Character Success Rate</li>
34
+ </ul>
35
+ <button onclick="showOCRDemo()" class="btn btn-primary">Try OCR Demo</button>
36
+ <a href="/docs#/default/ocr_endpoint_ocr_post" class="btn btn-secondary" target="_blank">API Documentation</a>
37
+ </div>
38
+
39
+ <div class="service-card llm-card">
40
+ <div class="icon">🤖</div>
41
+ <h3>LLM Endpoint</h3>
42
+ <p>Ask questions about historical documents with RAG-powered chatbot</p>
43
+ <ul class="features">
44
+ <li>Retrieval-Augmented Generation (RAG)</li>
45
+ <li>1,128 vectors from 28 documents</li>
46
+ <li>Citation-focused responses</li>
47
+ <li>Top-3 document retrieval</li>
48
+ </ul>
49
+ <button onclick="showLLMDemo()" class="btn btn-primary">Try LLM Demo</button>
50
+ <a href="/docs#/default/llm_endpoint_llm_post" class="btn btn-secondary" target="_blank">API Documentation</a>
51
+ </div>
52
+ </div>
53
+
54
+ <div id="demoSection" class="demo-section" style="display: none;">
55
+ <div class="demo-container">
56
+ <h3 id="demoTitle"></h3>
57
+ <button onclick="closeDemo()" class="close-btn">✕</button>
58
+
59
+ <div id="ocrDemo" style="display: none;">
60
+ <div class="upload-area">
61
+ <input type="file" id="ocrFile" accept=".pdf" onchange="handleOCRUpload()">
62
+ <label for="ocrFile" class="upload-label">
63
+ <span class="upload-icon">📁</span>
64
+ <span>Click to upload PDF or drag and drop</span>
65
+ <span class="upload-hint">Maximum file size: 10MB</span>
66
+ </label>
67
+ </div>
68
+ <div id="ocrResult" class="result-area"></div>
69
+ </div>
70
+
71
+ <div id="llmDemo" style="display: none;">
72
+ <div class="chat-container">
73
+ <div id="chatMessages" class="chat-messages"></div>
74
+ <div class="chat-input-container">
75
+ <textarea id="questionInput" placeholder="Ask a question about historical documents..." rows="3"></textarea>
76
+ <button onclick="askQuestion()" class="btn btn-primary">Send</button>
77
+ </div>
78
+ </div>
79
+ </div>
80
+ </div>
81
+ </div>
82
+
83
+ <div class="tech-stack">
84
+ <h3>Technical Stack</h3>
85
+ <div class="tech-grid">
86
+ <div class="tech-item">
87
+ <strong>OCR Model</strong>
88
+ <span>Llama-4-Maverick-17B</span>
89
+ </div>
90
+ <div class="tech-item">
91
+ <strong>Embedding Model</strong>
92
+ <span>BAAI/bge-large-en-v1.5</span>
93
+ </div>
94
+ <div class="tech-item">
95
+ <strong>Vector Database</strong>
96
+ <span>Pinecone (1024 dims)</span>
97
+ </div>
98
+ <div class="tech-item">
99
+ <strong>LLM Model</strong>
100
+ <span>Llama-4-Maverick-17B</span>
101
+ </div>
102
+ <div class="tech-item">
103
+ <strong>Framework</strong>
104
+ <span>FastAPI + Docker</span>
105
+ </div>
106
+ <div class="tech-item">
107
+ <strong>Documents</strong>
108
+ <span>28 PDFs, 1,128 vectors</span>
109
+ </div>
110
+ </div>
111
+ </div>
112
+
113
+ <div class="stats">
114
+ <div class="stat-card">
115
+ <div class="stat-number">88.3%</div>
116
+ <div class="stat-label">OCR Accuracy (CSR)</div>
117
+ </div>
118
+ <div class="stat-card">
119
+ <div class="stat-number">1,128</div>
120
+ <div class="stat-label">Total Vectors</div>
121
+ </div>
122
+ <div class="stat-card">
123
+ <div class="stat-number">28</div>
124
+ <div class="stat-label">Documents Indexed</div>
125
+ </div>
126
+ <div class="stat-card">
127
+ <div class="stat-number">~2.6s</div>
128
+ <div class="stat-label">Avg Response Time</div>
129
+ </div>
130
+ </div>
131
+ </main>
132
+
133
+ <footer>
134
+ <p>Built for SOCAR Hackathon | Powered by Azure OpenAI & Pinecone</p>
135
+ </footer>
136
+ </div>
137
+
138
+ <script src="{{ url_for('static', path='/js/app.js') }}"></script>
139
+ </body>
140
+ </html>