abakerdp commited on
Commit
4e2a91b
Β·
verified Β·
1 Parent(s): 562b867

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +104 -0
app.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import chromadb
3
+ from chromadb.config import Settings
4
+ from sentence_transformers import SentenceTransformer
5
+ import json
6
+ from pathlib import Path
7
+
8
+ # Initialize ChromaDB and model
9
+ chroma_client = chromadb.Client(Settings(
10
+ chroma_db_impl="duckdb+parquet",
11
+ persist_directory="db"
12
+ ))
13
+ model = SentenceTransformer('all-MiniLM-L6-v2')
14
+
15
+ def initialize_database():
16
+ # Load documents from JSON file
17
+ docs_path = Path("documents.json")
18
+ with open(docs_path) as f:
19
+ documents = json.load(f)
20
+
21
+ # Create or get collection
22
+ collection_name = "knowledge_base"
23
+ try:
24
+ collection = chroma_client.get_collection(collection_name)
25
+ except:
26
+ collection = chroma_client.create_collection(name=collection_name)
27
+
28
+ # Add documents to collection
29
+ texts = [doc["content"] for doc in documents]
30
+ metadata = [{"title": doc["title"], "source": doc.get("source", ""), "section": doc.get("section", "")}
31
+ for doc in documents]
32
+ embeddings = model.encode(texts).tolist()
33
+ ids = [str(i) for i in range(len(texts))]
34
+
35
+ collection.add(
36
+ documents=texts,
37
+ embeddings=embeddings,
38
+ metadatas=metadata,
39
+ ids=ids
40
+ )
41
+
42
+ return collection
43
+
44
+ # Initialize database
45
+ collection = initialize_database()
46
+
47
+ def search_documents(query, top_k=5):
48
+ if not query.strip():
49
+ return "Please enter a query"
50
+
51
+ # Generate embedding for query
52
+ query_embedding = model.encode(query).tolist()
53
+
54
+ # Query the collection
55
+ results = collection.query(
56
+ query_embeddings=[query_embedding],
57
+ n_results=top_k,
58
+ include=["documents", "metadatas", "distances"]
59
+ )
60
+
61
+ # Format results
62
+ output = ""
63
+ for doc, metadata, distance in zip(
64
+ results["documents"][0],
65
+ results["metadatas"][0],
66
+ results["distances"][0]
67
+ ):
68
+ relevance = round((1 - (distance / 2)) * 100)
69
+ output += f"\n\nπŸ“š {metadata['title']}\n"
70
+ output += f"πŸ“ {metadata['source']} β€’ {metadata['section']} β€’ Relevance: {relevance}%\n"
71
+ output += f"───────────────────\n{doc}\n"
72
+
73
+ return output
74
+
75
+ # Create Gradio interface
76
+ interface = gr.Interface(
77
+ fn=search_documents,
78
+ inputs=[
79
+ gr.Textbox(
80
+ lines=2,
81
+ placeholder="Enter your question here...",
82
+ label="Question"
83
+ ),
84
+ gr.Slider(
85
+ minimum=1,
86
+ maximum=10,
87
+ value=5,
88
+ step=1,
89
+ label="Number of results"
90
+ )
91
+ ],
92
+ outputs=gr.Textbox(
93
+ label="Search Results",
94
+ lines=20
95
+ ),
96
+ title="Knowledge Base Search",
97
+ description="Ask questions about your documents and get relevant answers.",
98
+ theme="default",
99
+ allow_flagging="never"
100
+ )
101
+
102
+ # Launch the app
103
+ if __name__ == "__main__":
104
+ interface.launch()