Spaces:
Sleeping
Sleeping
Commit ·
44c115d
1
Parent(s): 76fc5ce
Updated
Browse files- Dockerfile +0 -45
- README.md +0 -12
- app.py +0 -80
- prepare_data.py +0 -51
- requirements.txt +0 -12
- vector.py +0 -64
Dockerfile
DELETED
|
@@ -1,45 +0,0 @@
|
|
| 1 |
-
# Use lightweight Python image
|
| 2 |
-
FROM python:3.10-slim
|
| 3 |
-
|
| 4 |
-
# Install system dependencies
|
| 5 |
-
RUN apt-get update && apt-get install -y \
|
| 6 |
-
build-essential \
|
| 7 |
-
git \
|
| 8 |
-
curl \
|
| 9 |
-
&& rm -rf /var/lib/apt/lists/*
|
| 10 |
-
|
| 11 |
-
# Create non-root user
|
| 12 |
-
RUN useradd -m appuser
|
| 13 |
-
|
| 14 |
-
# Set work directory
|
| 15 |
-
WORKDIR /app
|
| 16 |
-
|
| 17 |
-
# Copy requirements first (for better caching)
|
| 18 |
-
COPY requirements.txt .
|
| 19 |
-
|
| 20 |
-
# Install Python dependencies
|
| 21 |
-
RUN pip install --no-cache-dir -r requirements.txt
|
| 22 |
-
|
| 23 |
-
# Copy project files
|
| 24 |
-
COPY . .
|
| 25 |
-
|
| 26 |
-
# Create Hugging Face cache dir and give full permissions
|
| 27 |
-
RUN mkdir -p /app/huggingface_cache && chmod -R 777 /app/huggingface_cache
|
| 28 |
-
|
| 29 |
-
# Change ownership of /app to appuser
|
| 30 |
-
RUN chown -R appuser:appuser /app
|
| 31 |
-
|
| 32 |
-
# Switch to non-root user
|
| 33 |
-
USER appuser
|
| 34 |
-
|
| 35 |
-
# Expose Hugging Face Spaces allowed port
|
| 36 |
-
EXPOSE 7860
|
| 37 |
-
|
| 38 |
-
# Hugging Face cache environment variables
|
| 39 |
-
ENV HF_HOME=/app/huggingface_cache
|
| 40 |
-
ENV HF_DATASETS_CACHE=/app/huggingface_cache
|
| 41 |
-
ENV TORCH_HOME=/app/huggingface_cache
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
# Start FastAPI app on port 7860
|
| 45 |
-
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
README.md
DELETED
|
@@ -1,12 +0,0 @@
|
|
| 1 |
-
---
|
| 2 |
-
title: AgriCopilot
|
| 3 |
-
emoji: 📉
|
| 4 |
-
colorFrom: purple
|
| 5 |
-
colorTo: gray
|
| 6 |
-
sdk: docker
|
| 7 |
-
pinned: false
|
| 8 |
-
license: apache-2.0
|
| 9 |
-
short_description: AgriCopilot is an AgenticAI-powered super-app for farmers
|
| 10 |
-
---
|
| 11 |
-
|
| 12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
DELETED
|
@@ -1,80 +0,0 @@
|
|
| 1 |
-
# app.py
|
| 2 |
-
import os
|
| 3 |
-
from fastapi import FastAPI
|
| 4 |
-
from langchain.prompts import PromptTemplate
|
| 5 |
-
from langchain_huggingface import HuggingFaceEndpoint
|
| 6 |
-
from vector import query_vector
|
| 7 |
-
|
| 8 |
-
app = FastAPI(title="AgriCopilot")
|
| 9 |
-
|
| 10 |
-
# ==============================
|
| 11 |
-
# ROOT HEALTH CHECK
|
| 12 |
-
# ==============================
|
| 13 |
-
@app.get("/")
|
| 14 |
-
async def root():
|
| 15 |
-
return {"status": "AgriCopilot AI Backend is working perfectly"}
|
| 16 |
-
|
| 17 |
-
# ==============================
|
| 18 |
-
# MODELS PER ENDPOINT
|
| 19 |
-
# ==============================
|
| 20 |
-
|
| 21 |
-
# 1. Crop Doctor (Image/Text)
|
| 22 |
-
crop_template = PromptTemplate(
|
| 23 |
-
input_variables=["symptoms"],
|
| 24 |
-
template="You are an agricultural crop doctor. A farmer reports: {symptoms}. Diagnose the most likely disease and suggest treatments in simple farmer-friendly language."
|
| 25 |
-
)
|
| 26 |
-
crop_llm = HuggingFaceEndpoint(repo_id="facebook/bart-large", task="text2text-generation")
|
| 27 |
-
|
| 28 |
-
# 2. Multilingual Chat
|
| 29 |
-
chat_template = PromptTemplate(
|
| 30 |
-
input_variables=["query"],
|
| 31 |
-
template="You are a multilingual AI assistant for farmers. Answer clearly in the same language as the user. Farmer says: {query}"
|
| 32 |
-
)
|
| 33 |
-
chat_llm = HuggingFaceEndpoint(repo_id="google/mt5-base", task="text2text-generation")
|
| 34 |
-
|
| 35 |
-
# 3. Disaster Summarizer
|
| 36 |
-
disaster_template = PromptTemplate(
|
| 37 |
-
input_variables=["report"],
|
| 38 |
-
template="You are an AI disaster assistant. Summarize the following report for farmers in simple steps: {report}"
|
| 39 |
-
)
|
| 40 |
-
disaster_llm = HuggingFaceEndpoint(repo_id="google/flan-t5-base", task="text2text-generation")
|
| 41 |
-
|
| 42 |
-
# 4. Marketplace Recommendation
|
| 43 |
-
market_template = PromptTemplate(
|
| 44 |
-
input_variables=["product"],
|
| 45 |
-
template="You are an agricultural marketplace recommender. Farmer wants to sell or buy: {product}. Suggest possible matches and advice."
|
| 46 |
-
)
|
| 47 |
-
market_llm = HuggingFaceEndpoint(repo_id="tiiuae/falcon-7b-instruct", task="text2text-generation")
|
| 48 |
-
|
| 49 |
-
# ==============================
|
| 50 |
-
# ENDPOINTS
|
| 51 |
-
# ==============================
|
| 52 |
-
|
| 53 |
-
@app.post("/crop-doctor")
|
| 54 |
-
async def crop_doctor(symptoms: str):
|
| 55 |
-
prompt = crop_template.format(symptoms=symptoms)
|
| 56 |
-
response = crop_llm(prompt)
|
| 57 |
-
return {"diagnosis": response}
|
| 58 |
-
|
| 59 |
-
@app.post("/multilingual-chat")
|
| 60 |
-
async def multilingual_chat(query: str):
|
| 61 |
-
prompt = chat_template.format(query=query)
|
| 62 |
-
response = chat_llm(prompt)
|
| 63 |
-
return {"reply": response}
|
| 64 |
-
|
| 65 |
-
@app.post("/disaster-summarizer")
|
| 66 |
-
async def disaster_summarizer(report: str):
|
| 67 |
-
prompt = disaster_template.format(report=report)
|
| 68 |
-
response = disaster_llm(prompt)
|
| 69 |
-
return {"summary": response}
|
| 70 |
-
|
| 71 |
-
@app.post("/marketplace")
|
| 72 |
-
async def marketplace(product: str):
|
| 73 |
-
prompt = market_template.format(product=product)
|
| 74 |
-
response = market_llm(prompt)
|
| 75 |
-
return {"recommendation": response}
|
| 76 |
-
|
| 77 |
-
@app.post("/vector-search")
|
| 78 |
-
async def vector_search(query: str):
|
| 79 |
-
results = query_vector(query)
|
| 80 |
-
return {"results": results}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
prepare_data.py
DELETED
|
@@ -1,51 +0,0 @@
|
|
| 1 |
-
# prepare_data.py
|
| 2 |
-
import os
|
| 3 |
-
import kagglehub
|
| 4 |
-
import pandas as pd
|
| 5 |
-
from datasets import load_dataset
|
| 6 |
-
|
| 7 |
-
os.makedirs("datasets", exist_ok=True)
|
| 8 |
-
|
| 9 |
-
# -----------------------
|
| 10 |
-
# 1. PlantVillage (Kaggle)
|
| 11 |
-
# -----------------------
|
| 12 |
-
print("Downloading PlantVillage dataset...")
|
| 13 |
-
pv_path = kagglehub.dataset_download("dittakavinikhita/plant-disease-prediction-disease-and-healthy")
|
| 14 |
-
|
| 15 |
-
# Pick the metadata CSV if available
|
| 16 |
-
for file in os.listdir(pv_path):
|
| 17 |
-
if file.endswith(".csv"):
|
| 18 |
-
src = os.path.join(pv_path, file)
|
| 19 |
-
dst = "datasets/plant_disease.csv"
|
| 20 |
-
pd.read_csv(src).to_csv(dst, index=False)
|
| 21 |
-
print("✅ Saved PlantVillage ->", dst)
|
| 22 |
-
|
| 23 |
-
# -----------------------
|
| 24 |
-
# 2. AfriQA (Hugging Face)
|
| 25 |
-
# -----------------------
|
| 26 |
-
print("Downloading AfriQA dataset...")
|
| 27 |
-
afriqa = load_dataset("masakhane/afriqa")
|
| 28 |
-
afriqa_df = pd.DataFrame(afriqa["train"])
|
| 29 |
-
|
| 30 |
-
# Merge question + answer into one text column
|
| 31 |
-
afriqa_df["text"] = "Q: " + afriqa_df["question"].astype(str) + " A: " + afriqa_df["answer"].astype(str)
|
| 32 |
-
afriqa_df[["text"]].to_csv("datasets/afriqa.csv", index=False)
|
| 33 |
-
print("✅ Saved AfriQA -> datasets/afriqa.csv")
|
| 34 |
-
|
| 35 |
-
# -----------------------
|
| 36 |
-
# 3. CrisisNLP (Hugging Face)
|
| 37 |
-
# -----------------------
|
| 38 |
-
print("Downloading CrisisNLP dataset...")
|
| 39 |
-
crisis = load_dataset("QCRI/CrisisBench-all-lang")
|
| 40 |
-
crisis_df = pd.DataFrame(crisis["train"])
|
| 41 |
-
|
| 42 |
-
# Pick relevant columns (tweet_text, label, etc.)
|
| 43 |
-
if "tweet_text" in crisis_df.columns:
|
| 44 |
-
crisis_df["text"] = crisis_df["tweet_text"].astype(str)
|
| 45 |
-
else:
|
| 46 |
-
crisis_df["text"] = crisis_df.astype(str).agg(" ".join, axis=1)
|
| 47 |
-
|
| 48 |
-
crisis_df[["text"]].to_csv("datasets/crisis.csv", index=False)
|
| 49 |
-
print("✅ Saved CrisisNLP -> datasets/crisis.csv")
|
| 50 |
-
|
| 51 |
-
print("🎉 All datasets prepared in /datasets")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
DELETED
|
@@ -1,12 +0,0 @@
|
|
| 1 |
-
fastapi
|
| 2 |
-
uvicorn
|
| 3 |
-
langchain
|
| 4 |
-
faiss-cpu
|
| 5 |
-
huggingface-hub
|
| 6 |
-
sentence-transformers
|
| 7 |
-
langchain_community
|
| 8 |
-
langchain
|
| 9 |
-
langchain-huggingface
|
| 10 |
-
kagglehub
|
| 11 |
-
pandas
|
| 12 |
-
datasets
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vector.py
DELETED
|
@@ -1,64 +0,0 @@
|
|
| 1 |
-
# vector.py
|
| 2 |
-
import os
|
| 3 |
-
import glob
|
| 4 |
-
import pandas as pd
|
| 5 |
-
from langchain_community.vectorstores import FAISS
|
| 6 |
-
from langchain_community.embeddings import HuggingFaceEmbeddings
|
| 7 |
-
|
| 8 |
-
# ----------------- CONFIG -----------------
|
| 9 |
-
VECTOR_PATH = "faiss_index"
|
| 10 |
-
EMBEDDING_MODEL = os.getenv("HF_EMBEDDING_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
|
| 11 |
-
HF_CACHE_DIR = os.getenv("HF_CACHE_DIR", "/app/huggingface_cache")
|
| 12 |
-
|
| 13 |
-
# Ensure cache dir exists
|
| 14 |
-
os.makedirs(HF_CACHE_DIR, exist_ok=True)
|
| 15 |
-
|
| 16 |
-
# ----------------- EMBEDDINGS -----------------
|
| 17 |
-
embeddings = HuggingFaceEmbeddings(
|
| 18 |
-
model_name=EMBEDDING_MODEL,
|
| 19 |
-
cache_folder=HF_CACHE_DIR
|
| 20 |
-
)
|
| 21 |
-
|
| 22 |
-
# ----------------- VECTOR STORE -----------------
|
| 23 |
-
def build_vectorstore():
|
| 24 |
-
"""Build FAISS index from CSV datasets."""
|
| 25 |
-
texts = []
|
| 26 |
-
for file in glob.glob("datasets/*.csv"):
|
| 27 |
-
try:
|
| 28 |
-
df = pd.read_csv(file)
|
| 29 |
-
if "text" in df.columns:
|
| 30 |
-
texts.extend(df["text"].dropna().astype(str).tolist())
|
| 31 |
-
else:
|
| 32 |
-
# fallback: join all columns into one string
|
| 33 |
-
for _, row in df.iterrows():
|
| 34 |
-
texts.append(" ".join(map(str, row.values)))
|
| 35 |
-
print(f"✅ Loaded {len(df)} rows from {file}")
|
| 36 |
-
except Exception as e:
|
| 37 |
-
print(f"⚠️ Skipping {file}, error: {e}")
|
| 38 |
-
|
| 39 |
-
if not texts:
|
| 40 |
-
texts = ["AgriCopilot initialized knowledge base."]
|
| 41 |
-
|
| 42 |
-
vectorstore = FAISS.from_texts(texts, embeddings)
|
| 43 |
-
vectorstore.save_local(VECTOR_PATH)
|
| 44 |
-
print("🎉 Vectorstore built with", len(texts), "documents")
|
| 45 |
-
return vectorstore
|
| 46 |
-
|
| 47 |
-
def load_vector_store():
|
| 48 |
-
"""Load FAISS index if available, else build new one."""
|
| 49 |
-
if os.path.exists(VECTOR_PATH):
|
| 50 |
-
return FAISS.load_local(
|
| 51 |
-
VECTOR_PATH,
|
| 52 |
-
embeddings,
|
| 53 |
-
allow_dangerous_deserialization=True
|
| 54 |
-
)
|
| 55 |
-
else:
|
| 56 |
-
return build_vectorstore()
|
| 57 |
-
|
| 58 |
-
vectorstore = load_vector_store()
|
| 59 |
-
|
| 60 |
-
# ----------------- QUERY -----------------
|
| 61 |
-
def query_vector(query: str, k: int = 3):
|
| 62 |
-
"""Perform similarity search on FAISS index."""
|
| 63 |
-
docs = vectorstore.similarity_search(query, k=k)
|
| 64 |
-
return [d.page_content for d in docs]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|