kheopss commited on
Commit
93c9f12
·
verified ·
1 Parent(s): 5fa2648

Upload 4 files

Browse files
public/elements/ImageGallery.jsx ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // components/GlobalImageModal.tsx
2
+ 'use client'
3
+
4
+ import { useEffect, useState } from "react"
5
+ import { Dialog, DialogContent, DialogHeader, DialogTitle, DialogDescription } from "@/components/ui/dialog"
6
+
7
+ export default function GlobalImageModal() {
8
+ const [selectedImageSrc, setSelectedImageSrc] = useState<string | null>(null)
9
+
10
+ useEffect(() => {
11
+ const handleClick = (e: MouseEvent) => {
12
+ const target = e.target as HTMLElement
13
+ if (target.tagName === 'IMG') {
14
+ const img = target as HTMLImageElement
15
+ setSelectedImageSrc(img.src)
16
+ }
17
+ }
18
+
19
+ document.addEventListener('click', handleClick)
20
+ return () => document.removeEventListener('click', handleClick)
21
+ }, [])
22
+
23
+ return (
24
+ <Dialog open={!!selectedImageSrc} onOpenChange={() => setSelectedImageSrc(null)}>
25
+ <DialogContent className="w-auto max-w-fit max-h-[90vh] flex flex-col items-center justify-center" >
26
+ {selectedImageSrc && (
27
+ <img
28
+ src={selectedImageSrc}
29
+ className="max-w-full max-h-[80vh] rounded-lg m-3"
30
+ />
31
+ )}
32
+ </DialogContent>
33
+ </Dialog>
34
+ )
35
+ }
public/favicon.ico ADDED
public/global.css ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ img {
2
+ cursor: pointer !important;
3
+ transform: scale(1.05);
4
+ transition: transform 0.2s ease;
5
+ }
vdb.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import hashlib
2
+ import json
3
+ import re
4
+ from pathlib import Path
5
+
6
+ from dotenv import load_dotenv
7
+ from llama_index.core import (QueryBundle)
8
+ from llama_index.core.postprocessor import LLMRerank
9
+ from nest_asyncio import apply
10
+ from openai import OpenAI
11
+ from tqdm import tqdm
12
+
13
+ from llama_index.core import VectorStoreIndex
14
+ from llama_index.embeddings.openai import OpenAIEmbedding
15
+
16
+ from llama_index.core import Document
17
+
18
+ # Load variables from .env
19
+ load_dotenv()
20
+
21
+ def build_documents(sections):
22
+ docs = []
23
+ for s in sections:
24
+ metadata = {"section_title": s["title"]}
25
+ docs.append(Document(text=s["content"], metadata=metadata))
26
+ return docs
27
+
28
+ def create_vector_index(docs):
29
+ embed_model = OpenAIEmbedding()
30
+ index = VectorStoreIndex.from_documents(docs, embed_model=embed_model)
31
+ return index
32
+
33
+ def split_markdown_by_section(md_path: str):
34
+ text = Path(md_path).read_text(encoding="utf-8")
35
+ sections = re.split(r"(?m)^# ", text)
36
+ chunks = []
37
+ for section in sections:
38
+ if not section.strip():
39
+ continue
40
+ title, *content = section.split("\n", 1)
41
+ body = content[0].strip() if content else ""
42
+ chunks.append({"title": title.strip(), "content": body})
43
+ return chunks
44
+
45
+
46
+
47
+ client = OpenAI()
48
+
49
+ apply()
50
+
51
+ tqdm.pandas()
52
+
53
+
54
+ def hash_data(data):
55
+ json_str = json.dumps(data, sort_keys=True)
56
+
57
+ json_bytes = json_str.encode('utf-8')
58
+
59
+ hash_hex = hashlib.sha256(json_bytes).hexdigest()
60
+
61
+ return hash_hex
62
+
63
+
64
+ def get_retrieved_nodes(query, index, vector_top_k=10, reranker_top_n=3, with_reranker=True):
65
+ query_bundle = QueryBundle(query)
66
+ retriever = index.as_retriever(similarity_top_k=vector_top_k)
67
+ retrieved_nodes = retriever.retrieve(query_bundle)
68
+
69
+ if with_reranker:
70
+ reranker = LLMRerank(choice_batch_size=5, top_n=reranker_top_n)
71
+ retrieved_nodes = reranker.postprocess_nodes(retrieved_nodes, query_bundle)
72
+
73
+ return retrieved_nodes
74
+
75
+
76
+ def get_all_text(nodes):
77
+ return ' '.join(f"\n- {node.get_text()}" for node in nodes)
78
+
79
+
80
+ async def further_retrieve(query, index, messages):
81
+ try:
82
+ retrieved_nodes = get_retrieved_nodes(query, index, vector_top_k=10, reranker_top_n=3, with_reranker=False)
83
+ return completion(query, get_all_text(retrieved_nodes), messages)
84
+ except Exception as e:
85
+ print(e)
86
+ return None
87
+
88
+
89
+ async def completion(query, docs, messages):
90
+ messages.extend([
91
+ {
92
+ "role": "system",
93
+ "content": f"""
94
+ Given tone and voice guidelines and customer support help documents, act as a customer support bot.
95
+ Answer any further questions as if you are customer support bot.
96
+ TONE AND VOICE:
97
+ promote the society, be gentle, be kind always positive.
98
+
99
+ DOCUMENT:
100
+ {docs}
101
+
102
+
103
+
104
+ INSTRUCTIONS:
105
+
106
+ - Answer the users QUESTION using the DOCUMENT text above.
107
+ - Format formula into latex format between $...$ or \[...\]
108
+ - Keep your answer ground in the facts of the DOCUMENT or chat history.
109
+ - If document has an image markdown ,use it in your answer
110
+ - Respond in same language as user Question
111
+ - Use Markdown Structure
112
+ - DOCUMENT can have images with there descriptions
113
+ - if a text is followed by an image dont skip the image
114
+ QUESTION:
115
+ """
116
+ },
117
+ {
118
+ "role": "system",
119
+ "content": query
120
+ }
121
+ ])
122
+ completion = client.chat.completions.create(
123
+ model="gpt-4o-mini",
124
+ messages=messages,
125
+ stream=True
126
+ )
127
+ for chunk in completion:
128
+ if chunk.choices[0].delta.content:
129
+ yield chunk.choices[0].delta.content