alaselababatunde commited on
Commit
44c115d
·
1 Parent(s): 76fc5ce
Files changed (6) hide show
  1. Dockerfile +0 -45
  2. README.md +0 -12
  3. app.py +0 -80
  4. prepare_data.py +0 -51
  5. requirements.txt +0 -12
  6. vector.py +0 -64
Dockerfile DELETED
@@ -1,45 +0,0 @@
1
- # Use lightweight Python image
2
- FROM python:3.10-slim
3
-
4
- # Install system dependencies
5
- RUN apt-get update && apt-get install -y \
6
- build-essential \
7
- git \
8
- curl \
9
- && rm -rf /var/lib/apt/lists/*
10
-
11
- # Create non-root user
12
- RUN useradd -m appuser
13
-
14
- # Set work directory
15
- WORKDIR /app
16
-
17
- # Copy requirements first (for better caching)
18
- COPY requirements.txt .
19
-
20
- # Install Python dependencies
21
- RUN pip install --no-cache-dir -r requirements.txt
22
-
23
- # Copy project files
24
- COPY . .
25
-
26
- # Create Hugging Face cache dir and give full permissions
27
- RUN mkdir -p /app/huggingface_cache && chmod -R 777 /app/huggingface_cache
28
-
29
- # Change ownership of /app to appuser
30
- RUN chown -R appuser:appuser /app
31
-
32
- # Switch to non-root user
33
- USER appuser
34
-
35
- # Expose Hugging Face Spaces allowed port
36
- EXPOSE 7860
37
-
38
- # Hugging Face cache environment variables
39
- ENV HF_HOME=/app/huggingface_cache
40
- ENV HF_DATASETS_CACHE=/app/huggingface_cache
41
- ENV TORCH_HOME=/app/huggingface_cache
42
-
43
-
44
- # Start FastAPI app on port 7860
45
- CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md DELETED
@@ -1,12 +0,0 @@
1
- ---
2
- title: AgriCopilot
3
- emoji: 📉
4
- colorFrom: purple
5
- colorTo: gray
6
- sdk: docker
7
- pinned: false
8
- license: apache-2.0
9
- short_description: AgriCopilot is an AgenticAI-powered super-app for farmers
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py DELETED
@@ -1,80 +0,0 @@
1
- # app.py
2
- import os
3
- from fastapi import FastAPI
4
- from langchain.prompts import PromptTemplate
5
- from langchain_huggingface import HuggingFaceEndpoint
6
- from vector import query_vector
7
-
8
- app = FastAPI(title="AgriCopilot")
9
-
10
- # ==============================
11
- # ROOT HEALTH CHECK
12
- # ==============================
13
- @app.get("/")
14
- async def root():
15
- return {"status": "AgriCopilot AI Backend is working perfectly"}
16
-
17
- # ==============================
18
- # MODELS PER ENDPOINT
19
- # ==============================
20
-
21
- # 1. Crop Doctor (Image/Text)
22
- crop_template = PromptTemplate(
23
- input_variables=["symptoms"],
24
- template="You are an agricultural crop doctor. A farmer reports: {symptoms}. Diagnose the most likely disease and suggest treatments in simple farmer-friendly language."
25
- )
26
- crop_llm = HuggingFaceEndpoint(repo_id="facebook/bart-large", task="text2text-generation")
27
-
28
- # 2. Multilingual Chat
29
- chat_template = PromptTemplate(
30
- input_variables=["query"],
31
- template="You are a multilingual AI assistant for farmers. Answer clearly in the same language as the user. Farmer says: {query}"
32
- )
33
- chat_llm = HuggingFaceEndpoint(repo_id="google/mt5-base", task="text2text-generation")
34
-
35
- # 3. Disaster Summarizer
36
- disaster_template = PromptTemplate(
37
- input_variables=["report"],
38
- template="You are an AI disaster assistant. Summarize the following report for farmers in simple steps: {report}"
39
- )
40
- disaster_llm = HuggingFaceEndpoint(repo_id="google/flan-t5-base", task="text2text-generation")
41
-
42
- # 4. Marketplace Recommendation
43
- market_template = PromptTemplate(
44
- input_variables=["product"],
45
- template="You are an agricultural marketplace recommender. Farmer wants to sell or buy: {product}. Suggest possible matches and advice."
46
- )
47
- market_llm = HuggingFaceEndpoint(repo_id="tiiuae/falcon-7b-instruct", task="text2text-generation")
48
-
49
- # ==============================
50
- # ENDPOINTS
51
- # ==============================
52
-
53
- @app.post("/crop-doctor")
54
- async def crop_doctor(symptoms: str):
55
- prompt = crop_template.format(symptoms=symptoms)
56
- response = crop_llm(prompt)
57
- return {"diagnosis": response}
58
-
59
- @app.post("/multilingual-chat")
60
- async def multilingual_chat(query: str):
61
- prompt = chat_template.format(query=query)
62
- response = chat_llm(prompt)
63
- return {"reply": response}
64
-
65
- @app.post("/disaster-summarizer")
66
- async def disaster_summarizer(report: str):
67
- prompt = disaster_template.format(report=report)
68
- response = disaster_llm(prompt)
69
- return {"summary": response}
70
-
71
- @app.post("/marketplace")
72
- async def marketplace(product: str):
73
- prompt = market_template.format(product=product)
74
- response = market_llm(prompt)
75
- return {"recommendation": response}
76
-
77
- @app.post("/vector-search")
78
- async def vector_search(query: str):
79
- results = query_vector(query)
80
- return {"results": results}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
prepare_data.py DELETED
@@ -1,51 +0,0 @@
1
- # prepare_data.py
2
- import os
3
- import kagglehub
4
- import pandas as pd
5
- from datasets import load_dataset
6
-
7
- os.makedirs("datasets", exist_ok=True)
8
-
9
- # -----------------------
10
- # 1. PlantVillage (Kaggle)
11
- # -----------------------
12
- print("Downloading PlantVillage dataset...")
13
- pv_path = kagglehub.dataset_download("dittakavinikhita/plant-disease-prediction-disease-and-healthy")
14
-
15
- # Pick the metadata CSV if available
16
- for file in os.listdir(pv_path):
17
- if file.endswith(".csv"):
18
- src = os.path.join(pv_path, file)
19
- dst = "datasets/plant_disease.csv"
20
- pd.read_csv(src).to_csv(dst, index=False)
21
- print("✅ Saved PlantVillage ->", dst)
22
-
23
- # -----------------------
24
- # 2. AfriQA (Hugging Face)
25
- # -----------------------
26
- print("Downloading AfriQA dataset...")
27
- afriqa = load_dataset("masakhane/afriqa")
28
- afriqa_df = pd.DataFrame(afriqa["train"])
29
-
30
- # Merge question + answer into one text column
31
- afriqa_df["text"] = "Q: " + afriqa_df["question"].astype(str) + " A: " + afriqa_df["answer"].astype(str)
32
- afriqa_df[["text"]].to_csv("datasets/afriqa.csv", index=False)
33
- print("✅ Saved AfriQA -> datasets/afriqa.csv")
34
-
35
- # -----------------------
36
- # 3. CrisisNLP (Hugging Face)
37
- # -----------------------
38
- print("Downloading CrisisNLP dataset...")
39
- crisis = load_dataset("QCRI/CrisisBench-all-lang")
40
- crisis_df = pd.DataFrame(crisis["train"])
41
-
42
- # Pick relevant columns (tweet_text, label, etc.)
43
- if "tweet_text" in crisis_df.columns:
44
- crisis_df["text"] = crisis_df["tweet_text"].astype(str)
45
- else:
46
- crisis_df["text"] = crisis_df.astype(str).agg(" ".join, axis=1)
47
-
48
- crisis_df[["text"]].to_csv("datasets/crisis.csv", index=False)
49
- print("✅ Saved CrisisNLP -> datasets/crisis.csv")
50
-
51
- print("🎉 All datasets prepared in /datasets")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt DELETED
@@ -1,12 +0,0 @@
1
- fastapi
2
- uvicorn
3
- langchain
4
- faiss-cpu
5
- huggingface-hub
6
- sentence-transformers
7
- langchain_community
8
- langchain
9
- langchain-huggingface
10
- kagglehub
11
- pandas
12
- datasets
 
 
 
 
 
 
 
 
 
 
 
 
 
vector.py DELETED
@@ -1,64 +0,0 @@
1
- # vector.py
2
- import os
3
- import glob
4
- import pandas as pd
5
- from langchain_community.vectorstores import FAISS
6
- from langchain_community.embeddings import HuggingFaceEmbeddings
7
-
8
- # ----------------- CONFIG -----------------
9
- VECTOR_PATH = "faiss_index"
10
- EMBEDDING_MODEL = os.getenv("HF_EMBEDDING_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
11
- HF_CACHE_DIR = os.getenv("HF_CACHE_DIR", "/app/huggingface_cache")
12
-
13
- # Ensure cache dir exists
14
- os.makedirs(HF_CACHE_DIR, exist_ok=True)
15
-
16
- # ----------------- EMBEDDINGS -----------------
17
- embeddings = HuggingFaceEmbeddings(
18
- model_name=EMBEDDING_MODEL,
19
- cache_folder=HF_CACHE_DIR
20
- )
21
-
22
- # ----------------- VECTOR STORE -----------------
23
- def build_vectorstore():
24
- """Build FAISS index from CSV datasets."""
25
- texts = []
26
- for file in glob.glob("datasets/*.csv"):
27
- try:
28
- df = pd.read_csv(file)
29
- if "text" in df.columns:
30
- texts.extend(df["text"].dropna().astype(str).tolist())
31
- else:
32
- # fallback: join all columns into one string
33
- for _, row in df.iterrows():
34
- texts.append(" ".join(map(str, row.values)))
35
- print(f"✅ Loaded {len(df)} rows from {file}")
36
- except Exception as e:
37
- print(f"⚠️ Skipping {file}, error: {e}")
38
-
39
- if not texts:
40
- texts = ["AgriCopilot initialized knowledge base."]
41
-
42
- vectorstore = FAISS.from_texts(texts, embeddings)
43
- vectorstore.save_local(VECTOR_PATH)
44
- print("🎉 Vectorstore built with", len(texts), "documents")
45
- return vectorstore
46
-
47
- def load_vector_store():
48
- """Load FAISS index if available, else build new one."""
49
- if os.path.exists(VECTOR_PATH):
50
- return FAISS.load_local(
51
- VECTOR_PATH,
52
- embeddings,
53
- allow_dangerous_deserialization=True
54
- )
55
- else:
56
- return build_vectorstore()
57
-
58
- vectorstore = load_vector_store()
59
-
60
- # ----------------- QUERY -----------------
61
- def query_vector(query: str, k: int = 3):
62
- """Perform similarity search on FAISS index."""
63
- docs = vectorstore.similarity_search(query, k=k)
64
- return [d.page_content for d in docs]