EngrKashifKhan's picture
Update app.py
b8167e9 verified
# === Imports ===
import gradio as gr
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
# === Step 1: Knowledge Base ===
eia_knowledge = [
"Environmental Impact Assessments (EIAs) evaluate the environmental consequences of development projects before implementation.",
"Common concerns in EIAs include air pollution, noise, biodiversity, water quality, and community displacement.",
"Textile manufacturing plants often require waste management plans, especially for chemical dyes.",
"Projects near rivers must assess the impact on aquatic ecosystems and water pollution.",
"Proper mitigation strategies must be proposed to reduce harmful environmental effects.",
"Agricultural areas require special attention to soil quality, pesticide run-off, and groundwater usage."
]
# === Step 2: Vector Store + Retriever using cosine similarity ===
class SimpleRetriever:
def __init__(self, documents):
self.model = SentenceTransformer("all-MiniLM-L6-v2")
self.sentences = documents
self.embeddings = self.model.encode(self.sentences)
def retrieve(self, query, k=3):
query_vec = self.model.encode([query])
similarities = cosine_similarity([query_vec], self.embeddings)[0]
top_indices = similarities.argsort()[-k:][::-1]
return [self.sentences[i] for i in top_indices]
# === Step 3: Local Generation Model ===
class RAGPipeline:
def __init__(self, retriever):
self.retriever = retriever
model_name = "tiiuae/falcon-7b-instruct" # You can switch to "distilgpt2" if on CPU
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
self.model = AutoModelForCausalLM.from_pretrained(model_name)
self.generator = pipeline("text-generation", model=self.model, tokenizer=self.tokenizer)
def generate_report(self, project_desc):
context = "\n".join(self.retriever.retrieve(project_desc))
prompt = f"""Write an Environmental Impact Assessment (EIA) report for the following project.
Context:
{context}
Project:
{project_desc}
EIA Report:
"""
output = self.generator(
prompt,
max_new_tokens=300,
do_sample=True,
temperature=0.7,
top_k=50,
top_p=0.95,
repetition_penalty=1.2
)[0]['generated_text']
return output[len(prompt):].strip()
# === Step 4: Run App ===
retriever = SimpleRetriever(eia_knowledge)
rag = RAGPipeline(retriever)
def generate_eia_report(project_description):
try:
return rag.generate_report(project_description)
except Exception as e:
return f"❌ Error: {str(e)}"
gr.Interface(
fn=generate_eia_report,
inputs=gr.Textbox(lines=4, placeholder="Describe your project..."),
outputs="text",
title="🌿 Environmental Impact Assessment Generator (Offline RAG)",
description="Enter a project description. This app uses offline RAG (Retrieval-Augmented Generation) to generate EIA reports without any API."
).launch()