Spaces:

Gaykar
/

AIService

Sleeping

App Files Files Community

Gaykar commited on Apr 12

Commit

f3b2b2d

1 Parent(s): 621eb6f

kka

Browse files

Files changed (34) hide show

app/ai_agents/__init__.py +0 -0
agents.py → app/ai_agents/agents.py +2 -2
app/core/__init__.py +0 -0
config.py → app/core/config.py +9 -17
app/database/__init__.py +0 -0
database.py → app/database/connection.py +7 -3
graph.py → app/graph.py +2 -2
graph_trial.py → app/graph_trial.py +1 -1
app.py → app/main.py +2 -3
nodes.py → app/nodes/nodes.py +5 -5
schemas.py → app/schemas/schemas.py +0 -0
state.py → app/state/state.py +0 -0
utils.py → app/utils/utils.py +1 -1
{ComplaintData → app/vectordatabase/ComplaintData}/buildingsPlanning.json +0 -0
{ComplaintData → app/vectordatabase/ComplaintData}/buildingsPlanning_langchain_formatted.json +0 -0
{ComplaintData → app/vectordatabase/ComplaintData}/complaint_matching_data.json +0 -0
{ComplaintData → app/vectordatabase/ComplaintData}/electricity.json +0 -0
{ComplaintData → app/vectordatabase/ComplaintData}/electricity_langchain_formatted.json +0 -0
{ComplaintData → app/vectordatabase/ComplaintData}/parkRecreation.json +0 -0
{ComplaintData → app/vectordatabase/ComplaintData}/parkRecreation_langchain_formatted.json +0 -0
{ComplaintData → app/vectordatabase/ComplaintData}/publicHealth.json +0 -0
{ComplaintData → app/vectordatabase/ComplaintData}/publicHealth_langchain_formatted.json +0 -0
{ComplaintData → app/vectordatabase/ComplaintData}/publicSafety.json +0 -0
{ComplaintData → app/vectordatabase/ComplaintData}/publicSafety_langchain_formatted.json +0 -0
{ComplaintData → app/vectordatabase/ComplaintData}/roadsInfrastructure.json +0 -0
{ComplaintData → app/vectordatabase/ComplaintData}/roadsInfrastructure_langchain_formatted.json +0 -0
{ComplaintData → app/vectordatabase/ComplaintData}/sanitationCleanliness.json +0 -0
{ComplaintData → app/vectordatabase/ComplaintData}/sanitationCleanliness_langchain_formatted.json +0 -0
{ComplaintData → app/vectordatabase/ComplaintData}/waterSewage.json +0 -0
{ComplaintData → app/vectordatabase/ComplaintData}/waterSewage_langchain_formatted.json +0 -0
app/vectordatabase/__init__.py +0 -0
app/vectordatabase/matching_data_bm25.pkl +3 -0
vectordatabase.py → app/vectordatabase/pinecone.py +31 -30
app/vectordatabase/priority_bm25.pkl +3 -0

app/ai_agents/__init__.py ADDED Viewed

File without changes

agents.py → app/ai_agents/agents.py RENAMED Viewed

@@ -1,6 +1,6 @@
 from langchain_groq import ChatGroq
-from config import settings
-from schemas import ComplaintClassificationResponse
 import os

 from langchain_groq import ChatGroq
+from app.core.config import settings
+from app.schemas.schemas import ComplaintClassificationResponse
 import os

app/core/__init__.py ADDED Viewed

File without changes

config.py → app/core/config.py RENAMED Viewed

@@ -1,28 +1,20 @@
-import os
 from pathlib import Path
-from typing import Optional
 from pydantic_settings import BaseSettings, SettingsConfigDict
-# Since your .env is in the root (based on your folder structure image)
-BASE_DIR = Path(__file__).resolve().parent
 class Settings(BaseSettings):
-    # Project Metadata
-    PROJECT_NAME: str
-    # API Keys & Secrets
     PINECONE_API_KEY: str
-    PINECONE_ENVIRONMENT: str
-    # Database Configuration
-    DATABASE_URL: str
-    # Pydantic Settings Config
     model_config = SettingsConfigDict(
         env_file=str(BASE_DIR / ".env"),
         env_file_encoding="utf-8",
         extra="ignore"
     )
-# Singleton instance to be imported across the project
 settings = Settings()

 from pathlib import Path
 from pydantic_settings import BaseSettings, SettingsConfigDict
+BASE_DIR = Path(__file__).resolve().parent.parent.parent
 class Settings(BaseSettings):
+    PROJECT_NAME: str = "City AI Sync"
+    GROQ_API_KEY: str
     PINECONE_API_KEY: str
+    DATABASE_URL: str
     model_config = SettingsConfigDict(
         env_file=str(BASE_DIR / ".env"),
         env_file_encoding="utf-8",
         extra="ignore"
     )
 settings = Settings()

app/database/__init__.py ADDED Viewed

File without changes

database.py → app/database/connection.py RENAMED Viewed

@@ -2,13 +2,17 @@ from sqlalchemy.orm import sessionmaker, Session
 from sqlalchemy import create_engine, Column, Integer, String, Float, ForeignKey
 from sqlalchemy.orm import declarative_base, relationship, sessionmaker
 import os
-from dotenv import load_dotenv
 # This loads the variables from your .env file into the system environment
-load_dotenv()
 # Access them using os.getenv()
-DB_URL= os.getenv("DB_URL")
 Base = declarative_base()

 from sqlalchemy import create_engine, Column, Integer, String, Float, ForeignKey
 from sqlalchemy.orm import declarative_base, relationship, sessionmaker
 import os
+from app.core.config import settings
+DB_URL=settings.DATABASE_URL
 # This loads the variables from your .env file into the system environment
 # Access them using os.getenv()
 Base = declarative_base()

graph.py → app/graph.py RENAMED Viewed

@@ -1,6 +1,6 @@
-from state import ComplaintState
 from langgraph.graph import StateGraph, START, END
-from nodes import classify_complaint_node, group_duplicate_complaints_node, calculate_priority_node, store_data_node, router
 workflow = StateGraph(ComplaintState)
 # Add Nodes

+from app.state.state import ComplaintState
 from langgraph.graph import StateGraph, START, END
+from app.nodes.nodes import classify_complaint_node, group_duplicate_complaints_node, calculate_priority_node, store_data_node, router
 workflow = StateGraph(ComplaintState)
 # Add Nodes

graph_trial.py → app/graph_trial.py RENAMED Viewed

@@ -1,4 +1,4 @@
-from graph import graph
 from langgraph.checkpoint.memory import MemorySaver
 checkpointer = MemorySaver()

+from app.graph import graph
 from langgraph.checkpoint.memory import MemorySaver
 checkpointer = MemorySaver()

app.py → app/main.py RENAMED Viewed

@@ -2,10 +2,9 @@ import os
 from fastapi import FastAPI, HTTPException, Depends
 from pydantic import BaseModel, Field
 from typing import Optional
 # Importing internal modules
-from config import settings
-from graph import graph # The compiled LangGraph instance
 app = FastAPI(title=settings.PROJECT_NAME)

 from fastapi import FastAPI, HTTPException, Depends
 from pydantic import BaseModel, Field
 from typing import Optional
 # Importing internal modules
+from app.core.config import settings
+from app.graph import graph # The compiled LangGraph instance
 app = FastAPI(title=settings.PROJECT_NAME)

nodes.py → app/nodes/nodes.py RENAMED Viewed

@@ -1,10 +1,10 @@
-from state import ComplaintState
-from database import Complaint, ComplaintUser, get_session
 from typing import Literal
 from sqlalchemy.orm import Session
-from agents import complaint_classifier_agent
-from vectordatabase import matching_retriever,retriever
-from utils import *
 # Assuming these are available in your global environment or config
 # from config import matching_retriever, get_session

+from app.state.state import ComplaintState
+from app.database.connection import Complaint, ComplaintUser, get_session
 from typing import Literal
 from sqlalchemy.orm import Session
+from app.vectordatabase.pinecone import matching_retriever,retriever
+from app.ai_agents.agents import complaint_classifier_agent
+from app.utils.utils import *
 # Assuming these are available in your global environment or config
 # from config import matching_retriever, get_session

schemas.py → app/schemas/schemas.py RENAMED Viewed

File without changes

state.py → app/state/state.py RENAMED Viewed

File without changes

utils.py → app/utils/utils.py RENAMED Viewed

@@ -1,5 +1,5 @@
 from sqlalchemy import and_
-from database import Complaint, ComplaintUser
 import numpy as np

 from sqlalchemy import and_
+from app.database.connection import Complaint, ComplaintUser
 import numpy as np

{ComplaintData → app/vectordatabase/ComplaintData}/buildingsPlanning.json RENAMED Viewed

File without changes

{ComplaintData → app/vectordatabase/ComplaintData}/buildingsPlanning_langchain_formatted.json RENAMED Viewed

File without changes

{ComplaintData → app/vectordatabase/ComplaintData}/complaint_matching_data.json RENAMED Viewed

File without changes

{ComplaintData → app/vectordatabase/ComplaintData}/electricity.json RENAMED Viewed

File without changes

{ComplaintData → app/vectordatabase/ComplaintData}/electricity_langchain_formatted.json RENAMED Viewed

File without changes

{ComplaintData → app/vectordatabase/ComplaintData}/parkRecreation.json RENAMED Viewed

File without changes

{ComplaintData → app/vectordatabase/ComplaintData}/parkRecreation_langchain_formatted.json RENAMED Viewed

File without changes

{ComplaintData → app/vectordatabase/ComplaintData}/publicHealth.json RENAMED Viewed

File without changes

{ComplaintData → app/vectordatabase/ComplaintData}/publicHealth_langchain_formatted.json RENAMED Viewed

File without changes

{ComplaintData → app/vectordatabase/ComplaintData}/publicSafety.json RENAMED Viewed

File without changes

{ComplaintData → app/vectordatabase/ComplaintData}/publicSafety_langchain_formatted.json RENAMED Viewed

File without changes

{ComplaintData → app/vectordatabase/ComplaintData}/roadsInfrastructure.json RENAMED Viewed

File without changes

{ComplaintData → app/vectordatabase/ComplaintData}/roadsInfrastructure_langchain_formatted.json RENAMED Viewed

File without changes

{ComplaintData → app/vectordatabase/ComplaintData}/sanitationCleanliness.json RENAMED Viewed

File without changes

{ComplaintData → app/vectordatabase/ComplaintData}/sanitationCleanliness_langchain_formatted.json RENAMED Viewed

File without changes

{ComplaintData → app/vectordatabase/ComplaintData}/waterSewage.json RENAMED Viewed

File without changes

{ComplaintData → app/vectordatabase/ComplaintData}/waterSewage_langchain_formatted.json RENAMED Viewed

File without changes

app/vectordatabase/__init__.py ADDED Viewed

File without changes

app/vectordatabase/matching_data_bm25.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e110dd07ebf397e44dc33e801131b710ad67fab368e68373f0ad4e925eed7a70
+size 3129

vectordatabase.py → app/vectordatabase/pinecone.py RENAMED Viewed

@@ -6,19 +6,29 @@ from typing import List
 from pathlib import Path
 from pinecone import Pinecone, ServerlessSpec
 from pinecone_text.sparse import BM25Encoder
-from langchain_huggingface import HuggingFaceEmbeddings
 from langchain_community.retrievers import PineconeHybridSearchRetriever
 from langchain_core.documents import Document
 from langchain_core.embeddings import Embeddings
-# 1. Environment & API Setup
-PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
 if not PINECONE_API_KEY:
-    raise ValueError("PINECONE_API_KEY not found. Set it in HF Space Secrets.")
 pc = Pinecone(api_key=PINECONE_API_KEY)
-# 2. Remote Embedding Configuration
 class GeneralRemoteEmbeddings(Embeddings):
     def __init__(self, endpoint: str):
         self.endpoint = endpoint
@@ -35,7 +45,7 @@ class GeneralRemoteEmbeddings(Embeddings):
 embeddings = GeneralRemoteEmbeddings(endpoint="https://gaykar-generalembeddings.hf.space")
-# 3. Index Initialization Helper
 def get_or_create_index(name: str):
     if name not in pc.list_indexes().names():
         pc.create_index(
@@ -46,17 +56,13 @@ def get_or_create_index(name: str):
         )
     return pc.Index(name)
-# Initialize both indices
 index_general = get_or_create_index("complaints-index")
 index_matching = get_or_create_index("user-complaint-matching-index")
-# 4. Data Loading (Linux Compatible Paths)
-BASE_DATA_DIR = Path("ComplaintData")
-PRIORITY_BM25_PKL = Path("priority_bm25.pkl")
-MATCHING_BM25_PKL = Path("matching_data_bm25.pkl")
 def load_docs_from_json(pattern: str):
     docs = []
     for file_path in BASE_DATA_DIR.glob(pattern):
         with open(file_path, "r", encoding="utf-8") as f:
             try:
@@ -70,21 +76,19 @@ def load_docs_from_json(pattern: str):
                 print(f"Error loading {file_path}: {e}")
     return docs
-# --- 5. BM25 & Retriever Setup for Priority Scoring ---
 general_docs = load_docs_from_json("*_langchain_formatted.json")
 bm25_general = BM25Encoder()
 if PRIORITY_BM25_PKL.exists():
-    print("Loading existing Priority BM25 model from pickle...")
     with open(PRIORITY_BM25_PKL, "rb") as f:
         bm25_general = pickle.load(f)
 else:
-    if general_docs:
-        print("Fitting Priority BM25 on general knowledge base...")
-        bm25_general.fit([doc.page_content for doc in general_docs])
-        with open(PRIORITY_BM25_PKL, "wb") as f:
-            pickle.dump(bm25_general, f)
-        print(f"Priority BM25 fitted and saved to {PRIORITY_BM25_PKL}")
 retriever = PineconeHybridSearchRetriever(
     embeddings=embeddings,
@@ -93,21 +97,19 @@ retriever = PineconeHybridSearchRetriever(
     alpha=0.85
 )
-# --- 6. BM25 & Retriever Setup for Duplicate Matching ---
 matching_docs = load_docs_from_json("complaint_matching_data.json")
 bm25_matching = BM25Encoder()
 if MATCHING_BM25_PKL.exists():
-    print("Loading existing Matching BM25 model from pickle...")
     with open(MATCHING_BM25_PKL, "rb") as f:
         bm25_matching = pickle.load(f)
 else:
-    if matching_docs:
-        print("Fitting Matching BM25 on complaint matching data...")
-        bm25_matching.fit([doc.page_content for doc in matching_docs])
-        with open(MATCHING_BM25_PKL, "wb") as f:
-            pickle.dump(bm25_matching, f)
-        print(f"Matching BM25 fitted and saved to {MATCHING_BM25_PKL}")
 matching_retriever = PineconeHybridSearchRetriever(
     embeddings=embeddings,
@@ -115,5 +117,4 @@ matching_retriever = PineconeHybridSearchRetriever(
     index=index_matching,
     top_k=1,
     alpha=0.9
-)

 from pathlib import Path
 from pinecone import Pinecone, ServerlessSpec
 from pinecone_text.sparse import BM25Encoder
 from langchain_community.retrievers import PineconeHybridSearchRetriever
 from langchain_core.documents import Document
 from langchain_core.embeddings import Embeddings
+from app.core.config import settings
+# 1. Path Resolution (Fixes the folder structure issue)
+# This finds the absolute path to the directory containing this file
+current_file_path = Path(__file__).resolve()
+VDB_DIR = current_file_path.parent
+BASE_DATA_DIR = VDB_DIR / "ComplaintData"
+# Pickle files will now be stored inside the vectordatabase folder too
+PRIORITY_BM25_PKL = VDB_DIR / "priority_bm25.pkl"
+MATCHING_BM25_PKL = VDB_DIR / "matching_data_bm25.pkl"
+# 2. Environment & API Setup
+PINECONE_API_KEY = settings.PINECONE_API_KEY
 if not PINECONE_API_KEY:
+    raise ValueError("PINECONE_API_KEY not found in settings.")
 pc = Pinecone(api_key=PINECONE_API_KEY)
+# 3. Remote Embedding Configuration
 class GeneralRemoteEmbeddings(Embeddings):
     def __init__(self, endpoint: str):
         self.endpoint = endpoint
 embeddings = GeneralRemoteEmbeddings(endpoint="https://gaykar-generalembeddings.hf.space")
+# 4. Index Initialization Helper
 def get_or_create_index(name: str):
     if name not in pc.list_indexes().names():
         pc.create_index(
         )
     return pc.Index(name)
 index_general = get_or_create_index("complaints-index")
 index_matching = get_or_create_index("user-complaint-matching-index")
+# 5. Data Loading Logic
 def load_docs_from_json(pattern: str):
     docs = []
+    # Search inside the absolute path resolved in step 1
     for file_path in BASE_DATA_DIR.glob(pattern):
         with open(file_path, "r", encoding="utf-8") as f:
             try:
                 print(f"Error loading {file_path}: {e}")
     return docs
+# --- 6. BM25 & Retriever Setup for Priority Scoring ---
 general_docs = load_docs_from_json("*_langchain_formatted.json")
 bm25_general = BM25Encoder()
 if PRIORITY_BM25_PKL.exists():
     with open(PRIORITY_BM25_PKL, "rb") as f:
         bm25_general = pickle.load(f)
 else:
+    # IMPORTANT: Always fit on at least one string to prevent "not fit" error
+    texts = [doc.page_content for doc in general_docs] if general_docs else ["seed text for priority"]
+    bm25_general.fit(texts)
+    with open(PRIORITY_BM25_PKL, "wb") as f:
+        pickle.dump(bm25_general, f)
 retriever = PineconeHybridSearchRetriever(
     embeddings=embeddings,
     alpha=0.85
 )
+# --- 7. BM25 & Retriever Setup for Duplicate Matching ---
 matching_docs = load_docs_from_json("complaint_matching_data.json")
 bm25_matching = BM25Encoder()
 if MATCHING_BM25_PKL.exists():
     with open(MATCHING_BM25_PKL, "rb") as f:
         bm25_matching = pickle.load(f)
 else:
+    # Safety fit for matching retriever
+    texts = [doc.page_content for doc in matching_docs] if matching_docs else ["seed text for matching"]
+    bm25_matching.fit(texts)
+    with open(MATCHING_BM25_PKL, "wb") as f:
+        pickle.dump(bm25_matching, f)
 matching_retriever = PineconeHybridSearchRetriever(
     embeddings=embeddings,
     index=index_matching,
     top_k=1,
     alpha=0.9
+)

app/vectordatabase/priority_bm25.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:42720b8474af5f170e289a6f49422ed74901b5546d1dd7fa6b2121277040eea7
+size 14608