Commit ·
92a9c38
1
Parent(s): b5187d4
chromadb
Browse files- app.py +29 -5
- requirements.txt +4 -3
- vector_store.py +61 -0
app.py
CHANGED
|
@@ -31,6 +31,7 @@ from google.genai import types
|
|
| 31 |
from story_generator import create_story_prompt_from_pdf, generate_video_from_prompt
|
| 32 |
from langchain_huggingface import HuggingFaceEndpoint
|
| 33 |
from bigquery_uploader import upload_diagnosis_to_bigquery
|
|
|
|
| 34 |
|
| 35 |
print("✅ All libraries imported successfully.")
|
| 36 |
|
|
@@ -53,6 +54,7 @@ print("Performing initial setup...")
|
|
| 53 |
VISION_MODEL, PROCESSOR = load_vision_model()
|
| 54 |
KB = KnowledgeBase()
|
| 55 |
RETRIEVER = KB # The retriever is now the KB itself
|
|
|
|
| 56 |
|
| 57 |
# Initialize ADK components for Connected Mode
|
| 58 |
adk_components = initialize_adk(VISION_MODEL, PROCESSOR, RETRIEVER)
|
|
@@ -169,17 +171,39 @@ def create_field_mode_ui(user_state):
|
|
| 169 |
report_title = diagnosis
|
| 170 |
cleaned_diagnosis = clean_diagnosis_text(diagnosis)
|
| 171 |
|
| 172 |
-
|
| 173 |
-
|
|
|
|
|
|
|
| 174 |
|
| 175 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
|
| 177 |
diagnosis_data = {
|
| 178 |
"ai_diagnosis": report_title,
|
| 179 |
-
"recommended_action":
|
|
|
|
| 180 |
"farmer_id": user_state.get("uid"),
|
| 181 |
-
"
|
|
|
|
| 182 |
"crop_type": "Maize",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 183 |
}
|
| 184 |
upload_diagnosis_to_bigquery(diagnosis_data)
|
| 185 |
|
|
|
|
| 31 |
from story_generator import create_story_prompt_from_pdf, generate_video_from_prompt
|
| 32 |
from langchain_huggingface import HuggingFaceEndpoint
|
| 33 |
from bigquery_uploader import upload_diagnosis_to_bigquery
|
| 34 |
+
from vector_store import embed_and_store_documents, search_documents
|
| 35 |
|
| 36 |
print("✅ All libraries imported successfully.")
|
| 37 |
|
|
|
|
| 54 |
VISION_MODEL, PROCESSOR = load_vision_model()
|
| 55 |
KB = KnowledgeBase()
|
| 56 |
RETRIEVER = KB # The retriever is now the KB itself
|
| 57 |
+
embed_and_store_documents() # Initialize and load the vector store
|
| 58 |
|
| 59 |
# Initialize ADK components for Connected Mode
|
| 60 |
adk_components = initialize_adk(VISION_MODEL, PROCESSOR, RETRIEVER)
|
|
|
|
| 171 |
report_title = diagnosis
|
| 172 |
cleaned_diagnosis = clean_diagnosis_text(diagnosis)
|
| 173 |
|
| 174 |
+
# --- Hybrid Search ---
|
| 175 |
+
# 1. Local Vector Store Search
|
| 176 |
+
local_remedy_list = search_documents(cleaned_diagnosis)
|
| 177 |
+
local_remedy = local_remedy_list[0] if local_remedy_list else "No remedy found in local knowledge base."
|
| 178 |
|
| 179 |
+
# 2. BigQuery Search (as fallback or primary)
|
| 180 |
+
search_query = "healthy maize" if "healthy" in cleaned_diagnosis.lower() else "phosphorus" if "phosphorus" in cleaned_diagnosis.lower() else "Wetin My Eye See So"
|
| 181 |
+
cloud_remedy = search_bigquery_for_remedy(search_query)
|
| 182 |
+
|
| 183 |
+
final_response = f"""
|
| 184 |
+
## Diagnosis Report
|
| 185 |
+
**Condition:**
|
| 186 |
+
### {report_title}
|
| 187 |
+
---
|
| 188 |
+
## Suggested Remedy (from Local Knowledge)
|
| 189 |
+
{local_remedy}
|
| 190 |
+
---
|
| 191 |
+
## Suggested Remedy (from Cloud)
|
| 192 |
+
{cloud_remedy}
|
| 193 |
+
"""
|
| 194 |
|
| 195 |
diagnosis_data = {
|
| 196 |
"ai_diagnosis": report_title,
|
| 197 |
+
"recommended_action": local_remedy, # Prioritizing local remedy for logging
|
| 198 |
+
"confidence_score": None, # Placeholder
|
| 199 |
"farmer_id": user_state.get("uid"),
|
| 200 |
+
"gps_latitude": None, # Placeholder
|
| 201 |
+
"gps_longitude": None, # Placeholder
|
| 202 |
"crop_type": "Maize",
|
| 203 |
+
"crop_variety": None, # Placeholder
|
| 204 |
+
"farmer_feedback": feedback,
|
| 205 |
+
"treatment_applied": None, # Placeholder
|
| 206 |
+
"outcome_image_id": None, # Placeholder
|
| 207 |
}
|
| 208 |
upload_diagnosis_to_bigquery(diagnosis_data)
|
| 209 |
|
requirements.txt
CHANGED
|
@@ -11,12 +11,13 @@ langchain-huggingface
|
|
| 11 |
langchain-core
|
| 12 |
# For our RAG knowledge base (vector store and embeddings)
|
| 13 |
sentence-transformers
|
| 14 |
-
|
|
|
|
| 15 |
pymupdf
|
| 16 |
duckduckgo-search
|
| 17 |
langgraph
|
| 18 |
google-genai
|
| 19 |
google-adk
|
| 20 |
-
pypdf
|
| 21 |
google-cloud-bigquery
|
| 22 |
-
requests
|
|
|
|
|
|
| 11 |
langchain-core
|
| 12 |
# For our RAG knowledge base (vector store and embeddings)
|
| 13 |
sentence-transformers
|
| 14 |
+
chromadb
|
| 15 |
+
pypdf
|
| 16 |
pymupdf
|
| 17 |
duckduckgo-search
|
| 18 |
langgraph
|
| 19 |
google-genai
|
| 20 |
google-adk
|
|
|
|
| 21 |
google-cloud-bigquery
|
| 22 |
+
requests
|
| 23 |
+
faiss-cpu
|
vector_store.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import chromadb
|
| 2 |
+
from sentence_transformers import SentenceTransformer
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
# --- Constants ---
|
| 6 |
+
MODEL_NAME = "all-MiniLM-L6-v2"
|
| 7 |
+
COLLECTION_NAME = "aura_mind_knowledge"
|
| 8 |
+
KNOWLEDGE_BASE_DIR = "knowledge_base_data"
|
| 9 |
+
|
| 10 |
+
# --- Initialize ChromaDB and Model ---
|
| 11 |
+
client = chromadb.PersistentClient(path="chroma_db")
|
| 12 |
+
model = SentenceTransformer(MODEL_NAME)
|
| 13 |
+
collection = client.get_or_create_collection(name=COLLECTION_NAME)
|
| 14 |
+
|
| 15 |
+
def embed_and_store_documents():
|
| 16 |
+
"""
|
| 17 |
+
Reads documents from the knowledge base directory, generates embeddings,
|
| 18 |
+
and stores them in ChromaDB.
|
| 19 |
+
"""
|
| 20 |
+
if collection.count() > 0:
|
| 21 |
+
print("✅ Knowledge base is already loaded into ChromaDB.")
|
| 22 |
+
return
|
| 23 |
+
|
| 24 |
+
print("Embedding and storing documents in ChromaDB...")
|
| 25 |
+
documents = []
|
| 26 |
+
ids = []
|
| 27 |
+
for filename in os.listdir(KNOWLEDGE_BASE_DIR):
|
| 28 |
+
if filename.endswith(".txt"):
|
| 29 |
+
with open(os.path.join(KNOWLEDGE_BASE_DIR, filename), "r") as f:
|
| 30 |
+
documents.append(f.read())
|
| 31 |
+
ids.append(filename)
|
| 32 |
+
|
| 33 |
+
if documents:
|
| 34 |
+
embeddings = model.encode(documents).tolist()
|
| 35 |
+
collection.add(
|
| 36 |
+
embeddings=embeddings,
|
| 37 |
+
documents=documents,
|
| 38 |
+
ids=ids
|
| 39 |
+
)
|
| 40 |
+
print(f"✅ Successfully stored {len(documents)} documents in ChromaDB.")
|
| 41 |
+
|
| 42 |
+
def search_documents(query: str, n_results: int = 1) -> list:
|
| 43 |
+
"""
|
| 44 |
+
Searches for relevant documents in ChromaDB based on a query.
|
| 45 |
+
|
| 46 |
+
Args:
|
| 47 |
+
query: The search query.
|
| 48 |
+
n_results: The number of results to return.
|
| 49 |
+
|
| 50 |
+
Returns:
|
| 51 |
+
A list of relevant documents.
|
| 52 |
+
"""
|
| 53 |
+
if not query:
|
| 54 |
+
return []
|
| 55 |
+
|
| 56 |
+
query_embedding = model.encode([query]).tolist()
|
| 57 |
+
results = collection.query(
|
| 58 |
+
query_embeddings=query_embedding,
|
| 59 |
+
n_results=n_results,
|
| 60 |
+
)
|
| 61 |
+
return results['documents'][0] if results['documents'] else []
|