surfiniaburger commited on
Commit
92a9c38
·
1 Parent(s): b5187d4
Files changed (3) hide show
  1. app.py +29 -5
  2. requirements.txt +4 -3
  3. vector_store.py +61 -0
app.py CHANGED
@@ -31,6 +31,7 @@ from google.genai import types
31
  from story_generator import create_story_prompt_from_pdf, generate_video_from_prompt
32
  from langchain_huggingface import HuggingFaceEndpoint
33
  from bigquery_uploader import upload_diagnosis_to_bigquery
 
34
 
35
  print("✅ All libraries imported successfully.")
36
 
@@ -53,6 +54,7 @@ print("Performing initial setup...")
53
  VISION_MODEL, PROCESSOR = load_vision_model()
54
  KB = KnowledgeBase()
55
  RETRIEVER = KB # The retriever is now the KB itself
 
56
 
57
  # Initialize ADK components for Connected Mode
58
  adk_components = initialize_adk(VISION_MODEL, PROCESSOR, RETRIEVER)
@@ -169,17 +171,39 @@ def create_field_mode_ui(user_state):
169
  report_title = diagnosis
170
  cleaned_diagnosis = clean_diagnosis_text(diagnosis)
171
 
172
- search_query = "healthy maize" if "healthy" in cleaned_diagnosis.lower() else "phosphorus" if "phosphorus" in cleaned_diagnosis.lower() else "Wetin My Eye See So"
173
- remedy = search_bigquery_for_remedy(search_query)
 
 
174
 
175
- final_response = f"## Diagnosis Report **Condition:** ### {report_title} --- ## Suggested Remedy {remedy}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
 
177
  diagnosis_data = {
178
  "ai_diagnosis": report_title,
179
- "recommended_action": remedy,
 
180
  "farmer_id": user_state.get("uid"),
181
- "farmer_feedback": feedback,
 
182
  "crop_type": "Maize",
 
 
 
 
183
  }
184
  upload_diagnosis_to_bigquery(diagnosis_data)
185
 
 
31
  from story_generator import create_story_prompt_from_pdf, generate_video_from_prompt
32
  from langchain_huggingface import HuggingFaceEndpoint
33
  from bigquery_uploader import upload_diagnosis_to_bigquery
34
+ from vector_store import embed_and_store_documents, search_documents
35
 
36
  print("✅ All libraries imported successfully.")
37
 
 
54
  VISION_MODEL, PROCESSOR = load_vision_model()
55
  KB = KnowledgeBase()
56
  RETRIEVER = KB # The retriever is now the KB itself
57
+ embed_and_store_documents() # Initialize and load the vector store
58
 
59
  # Initialize ADK components for Connected Mode
60
  adk_components = initialize_adk(VISION_MODEL, PROCESSOR, RETRIEVER)
 
171
  report_title = diagnosis
172
  cleaned_diagnosis = clean_diagnosis_text(diagnosis)
173
 
174
+ # --- Hybrid Search ---
175
+ # 1. Local Vector Store Search
176
+ local_remedy_list = search_documents(cleaned_diagnosis)
177
+ local_remedy = local_remedy_list[0] if local_remedy_list else "No remedy found in local knowledge base."
178
 
179
+ # 2. BigQuery Search (as fallback or primary)
180
+ search_query = "healthy maize" if "healthy" in cleaned_diagnosis.lower() else "phosphorus" if "phosphorus" in cleaned_diagnosis.lower() else "Wetin My Eye See So"
181
+ cloud_remedy = search_bigquery_for_remedy(search_query)
182
+
183
+ final_response = f"""
184
+ ## Diagnosis Report
185
+ **Condition:**
186
+ ### {report_title}
187
+ ---
188
+ ## Suggested Remedy (from Local Knowledge)
189
+ {local_remedy}
190
+ ---
191
+ ## Suggested Remedy (from Cloud)
192
+ {cloud_remedy}
193
+ """
194
 
195
  diagnosis_data = {
196
  "ai_diagnosis": report_title,
197
+ "recommended_action": local_remedy, # Prioritizing local remedy for logging
198
+ "confidence_score": None, # Placeholder
199
  "farmer_id": user_state.get("uid"),
200
+ "gps_latitude": None, # Placeholder
201
+ "gps_longitude": None, # Placeholder
202
  "crop_type": "Maize",
203
+ "crop_variety": None, # Placeholder
204
+ "farmer_feedback": feedback,
205
+ "treatment_applied": None, # Placeholder
206
+ "outcome_image_id": None, # Placeholder
207
  }
208
  upload_diagnosis_to_bigquery(diagnosis_data)
209
 
requirements.txt CHANGED
@@ -11,12 +11,13 @@ langchain-huggingface
11
  langchain-core
12
  # For our RAG knowledge base (vector store and embeddings)
13
  sentence-transformers
14
- faiss-cpu
 
15
  pymupdf
16
  duckduckgo-search
17
  langgraph
18
  google-genai
19
  google-adk
20
- pypdf
21
  google-cloud-bigquery
22
- requests
 
 
11
  langchain-core
12
  # For our RAG knowledge base (vector store and embeddings)
13
  sentence-transformers
14
+ chromadb
15
+ pypdf
16
  pymupdf
17
  duckduckgo-search
18
  langgraph
19
  google-genai
20
  google-adk
 
21
  google-cloud-bigquery
22
+ requests
23
+ faiss-cpu
vector_store.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import chromadb
2
+ from sentence_transformers import SentenceTransformer
3
+ import os
4
+
5
+ # --- Constants ---
6
+ MODEL_NAME = "all-MiniLM-L6-v2"
7
+ COLLECTION_NAME = "aura_mind_knowledge"
8
+ KNOWLEDGE_BASE_DIR = "knowledge_base_data"
9
+
10
+ # --- Initialize ChromaDB and Model ---
11
+ client = chromadb.PersistentClient(path="chroma_db")
12
+ model = SentenceTransformer(MODEL_NAME)
13
+ collection = client.get_or_create_collection(name=COLLECTION_NAME)
14
+
15
+ def embed_and_store_documents():
16
+ """
17
+ Reads documents from the knowledge base directory, generates embeddings,
18
+ and stores them in ChromaDB.
19
+ """
20
+ if collection.count() > 0:
21
+ print("✅ Knowledge base is already loaded into ChromaDB.")
22
+ return
23
+
24
+ print("Embedding and storing documents in ChromaDB...")
25
+ documents = []
26
+ ids = []
27
+ for filename in os.listdir(KNOWLEDGE_BASE_DIR):
28
+ if filename.endswith(".txt"):
29
+ with open(os.path.join(KNOWLEDGE_BASE_DIR, filename), "r") as f:
30
+ documents.append(f.read())
31
+ ids.append(filename)
32
+
33
+ if documents:
34
+ embeddings = model.encode(documents).tolist()
35
+ collection.add(
36
+ embeddings=embeddings,
37
+ documents=documents,
38
+ ids=ids
39
+ )
40
+ print(f"✅ Successfully stored {len(documents)} documents in ChromaDB.")
41
+
42
+ def search_documents(query: str, n_results: int = 1) -> list:
43
+ """
44
+ Searches for relevant documents in ChromaDB based on a query.
45
+
46
+ Args:
47
+ query: The search query.
48
+ n_results: The number of results to return.
49
+
50
+ Returns:
51
+ A list of relevant documents.
52
+ """
53
+ if not query:
54
+ return []
55
+
56
+ query_embedding = model.encode([query]).tolist()
57
+ results = collection.query(
58
+ query_embeddings=query_embedding,
59
+ n_results=n_results,
60
+ )
61
+ return results['documents'][0] if results['documents'] else []