harshinde commited on
Commit
ff0c102
·
verified ·
1 Parent(s): a30ae44

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -20
app.py CHANGED
@@ -1,32 +1,37 @@
1
- import os
2
  import streamlit as st
3
  from langchain.embeddings import HuggingFaceEmbeddings
4
  from langchain.vectorstores import FAISS
5
  from langchain.text_splitter import RecursiveCharacterTextSplitter
6
- from langchain.llms import HuggingFaceHub
7
  from langchain.schema import Document
8
- import requests
 
 
9
  from io import BytesIO
10
  import fitz # PyMuPDF
11
- from dotenv import load_dotenv
12
 
13
  # Set device based on GPU availability
14
  device = "cuda" if torch.cuda.is_available() else "cpu"
15
 
16
- # Load environment variables from .env file
17
- load_dotenv()
 
18
 
19
- # Hugging Face API token should now be loaded from the .env file
20
- # Explicitly set the Hugging Face API token from the environment variable
21
- os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv("HUGGINGFACE_API_TOKEN")
 
22
 
23
- # Load embeddings with Hugging Face API
24
- embedding_model = "sentence-transformers/all-MiniLM-L6-v2"
25
- embeddings = HuggingFaceEmbeddings(model_name=embedding_model) # Removed api_key parameter
 
 
 
 
 
26
 
27
- # Set up the text generation model using Hugging Face Hub
28
- model_name = "google/flan-t5-small" # Use a smaller model to reduce response time and cost
29
- llm = HuggingFaceHub(repo_id=model_name, huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN"), model_kwargs={"max_length": 256, "temperature": 0.7})
30
 
31
  # Streamlit interface
32
  def main():
@@ -68,12 +73,12 @@ def main():
68
  # Concatenate retrieved docs into a single prompt
69
  prompt = "\n".join([doc.page_content for doc in docs]) + "\n\n" + user_input
70
 
71
- # Generate response using the Hugging Face API
72
  try:
73
- response = llm(prompt)
74
  st.write(response)
75
- except requests.exceptions.RequestException as e:
76
- st.error(f"Error connecting to Hugging Face API: {e}")
77
 
78
  if __name__ == "__main__":
79
- main()
 
 
1
  import streamlit as st
2
  from langchain.embeddings import HuggingFaceEmbeddings
3
  from langchain.vectorstores import FAISS
4
  from langchain.text_splitter import RecursiveCharacterTextSplitter
5
+ from langchain.llms import HuggingFacePipeline
6
  from langchain.schema import Document
7
+ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline
8
+ from sentence_transformers import SentenceTransformer
9
+ import torch
10
  from io import BytesIO
11
  import fitz # PyMuPDF
 
12
 
13
  # Set device based on GPU availability
14
  device = "cuda" if torch.cuda.is_available() else "cpu"
15
 
16
+ # Load embeddings with a smaller model and run on CPU
17
+ embedding_model = "all-MiniLM-L6-v2"
18
+ embeddings = HuggingFaceEmbeddings(model_name=embedding_model, model_kwargs={'device': 'cpu'})
19
 
20
+ # Set up text generation model with PyTorch-compatible pipeline
21
+ model_name = "google/flan-t5-small" # Or use a smaller model if needed
22
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
23
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)
24
 
25
+ # Create a text generation pipeline
26
+ generator = pipeline(
27
+ "text2text-generation",
28
+ model=model,
29
+ tokenizer=tokenizer,
30
+ device=0 if device == "cuda" else -1,
31
+ model_kwargs={"max_length": 256, "temperature": 0.7}
32
+ )
33
 
34
+ llm = HuggingFacePipeline(pipeline=generator)
 
 
35
 
36
  # Streamlit interface
37
  def main():
 
73
  # Concatenate retrieved docs into a single prompt
74
  prompt = "\n".join([doc.page_content for doc in docs]) + "\n\n" + user_input
75
 
76
+ # Generate response
77
  try:
78
+ response = generator(prompt, max_new_tokens=50, num_return_sequences=1)[0]["generated_text"]
79
  st.write(response)
80
+ except torch.cuda.OutOfMemoryError:
81
+ st.error("Out of memory. Try using a smaller model or fewer documents.")
82
 
83
  if __name__ == "__main__":
84
+ main()