import streamlit as st import logging import os from langchain_community.vectorstores import FAISS from langchain_community.document_loaders import CSVLoader from langchain_community.embeddings import HuggingFaceEmbeddings from langchain.prompts import PromptTemplate from langchain.llms import HuggingFaceHub import dotenv import yaml # Load environment variables from .env file dotenv.load_dotenv() # Load configuration from YAML def load_config(): with open("yaml-editor-online.yaml", "r") as f: config = yaml.safe_load(f) return config # Securely fetch Hugging Face API key hf_token = os.getenv('HUGGING') # Set this in your environment # Load configuration config = load_config() logging.basicConfig(level=logging.INFO) # Initialize Embeddings Model instructor_embeddings = HuggingFaceEmbeddings(model_name=config["embedding_model"]) # Initialize FAISS Vector Database def create_vector_db(): try: loader = CSVLoader(file_path="plant_biodata.csv", source_column="Information about the disease") data = loader.load() # Create FAISS vector store vectordb = FAISS.from_documents(documents=data, embedding=instructor_embeddings) vectordb.save_local(config["vector_db_path"]) logging.info("Vector database successfully created and saved.") except Exception as e: logging.error("Error creating vector database:", exc_info=e) # Load FAISS and retrieve relevant documents import textwrap def get_qa_chain(query): try: if not os.path.exists(config["vector_db_path"]): logging.error(f"FAISS index path does not exist: {config['vector_db_path']}") return "Error: No data found." vectordb = FAISS.load_local( config["vector_db_path"], instructor_embeddings, allow_dangerous_deserialization=True ) retriever = vectordb.as_retriever(score_threshold=config["score_threshold"]) # Retrieve top-k relevant documents relevant_docs = retriever.get_relevant_documents(query)[:3] if not relevant_docs: return "No relevant information found." # Summarizing relevant documents summarized_context = " ".join(doc.page_content for doc in relevant_docs) summarized_context = textwrap.shorten(summarized_context, width=600, placeholder="...") # Refined prompt enforcing bullet-point output prompt_template = """Given the following context and a question, generate a structured answer in bullet points. QUESTION: {query} Ensure the response is simple, clear, and formatted as bullet points avoiding complex terms. """ prompt = PromptTemplate(input_variables=["query"], template=prompt_template).format(query=query) # Call LLM llm = HuggingFaceHub( repo_id=config["model_name"], model_kwargs={ "temperature": config["temperature"], "max_length": 150, "top_p": config["top_p"], "top_k": config["top_k"] }, huggingfacehub_api_token=hf_token ) response = llm(prompt) return response.strip() except Exception as e: logging.error("Error getting response:", exc_info=e) return "Sorry, there was an error processing your request." # Streamlit UI with Dark Mode def main(): st.set_page_config(page_title="Crop Disease Assistant", page_icon="🌱", layout="centered") # Custom CSS for Dark Mode dark_theme_css = """ """ st.markdown(dark_theme_css, unsafe_allow_html=True) st.title("🌾 Crop Disease Assistant") st.write("Enter a crop disease-related question, and get simplified information.") # Text input for the user query query = st.text_input("Enter your query:") # Display response on clicking the button if st.button("Get Information"): if query: response = get_qa_chain(query) print("HELLOOOO") print(response[300:]) st.markdown(f"**📌 Response:**\n\n{response[250:]}") else: st.write("Please enter a query to get a response.") if __name__ == "__main__": if not os.path.exists(config["vector_db_path"]): logging.info(f"Vector database not found at {config['vector_db_path']}, creating it now.") create_vector_db() main()