Shami96 commited on
Commit
aa235e2
·
verified ·
1 Parent(s): d847f8e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -59
app.py CHANGED
@@ -5,72 +5,82 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
5
  from langchain_community.vectorstores import Chroma
6
  from langchain_community.embeddings import HuggingFaceEmbeddings
7
  from langchain_groq import ChatGroq
8
- from langchain.document_loaders import PyPDFLoader
9
 
10
- # --- Hugging Face Hub Setup ---
11
- HF_REPO_ID = "Shami96/7solar-documentation" # Replace with your dataset
12
- HF_PDF_NAME = "7solar_documentation.pdf" # Your PDF filename
 
13
 
14
- # --- Load PDF from Hugging Face Hub ---
15
- def load_pdf_from_hf():
16
- pdf_path = hf_hub_download(
17
- repo_id=HF_REPO_ID,
18
- filename=HF_PDF_NAME,
19
- repo_type="dataset", # Critical for datasets!
20
- token=os.environ.get("HF_TOKEN") # For private repos
21
- )
22
- loader = PyPDFLoader(pdf_path)
23
- return loader.load()
 
 
 
 
 
 
24
 
25
- # --- Split & Embed Docs ---
26
- def create_vector_db():
27
- docs = load_pdf_from_hf()
28
  text_splitter = RecursiveCharacterTextSplitter(
29
- chunk_size=2000,
30
- chunk_overlap=300
31
  )
32
- chunks = text_splitter.split_documents(docs)
33
  embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
34
  return Chroma.from_documents(chunks, embeddings)
35
 
36
- # --- RAG Chatbot Logic ---
37
- def get_response(query, history):
38
- # Handle greetings
39
- if query.lower() in ["hi", "hello", "hey"]:
40
- return history + [(query, "Hello! 👋 Ask me about 7Solar's solar packages or services!")]
41
-
42
- # Retrieve relevant doc chunks
43
- matching_docs = vector_db.similarity_search(query, k=5)
44
- if not matching_docs:
45
- return history + [(query, "I couldn't find details. Ask about 7Solar's services!")]
46
-
47
- # Generate LLM response
48
- llm = ChatGroq(
49
- model_name="llama3-70b-8192",
50
- temperature=0.2,
51
- api_key=os.environ.get("GROQ_API_KEY") # Set in Spaces Secrets
52
- )
53
- context = "\n\n".join([doc.page_content for doc in matching_docs])
54
- response = llm.invoke(
55
- f"Answer this query using ONLY the text below:\n\n{context}\n\nQuestion: {query}"
56
- )
57
- return history + [(query, response.content)]
58
-
59
- # --- Initialize Vector DB ---
60
- print("⚙️ Loading document...")
61
- vector_db = create_vector_db()
 
 
 
62
 
63
- # --- Gradio Interface ---
64
- with gr.Blocks() as demo:
65
- gr.Markdown("# ☀️ 7Solar Smart Assistant")
66
- chatbot = gr.Chatbot()
67
- msg = gr.Textbox(label="Ask about solar packages, services, etc.")
68
- msg.submit(get_response, [msg, chatbot], [chatbot])
69
- clear = gr.Button("Clear Chat")
70
- clear.click(lambda: [], None, chatbot, queue=False)
71
 
72
- demo.launch(
73
- server_name="0.0.0.0",
74
- server_port=7860,
75
- #enable_api=True # This explicitly enables the API
76
- )
 
 
 
5
  from langchain_community.vectorstores import Chroma
6
  from langchain_community.embeddings import HuggingFaceEmbeddings
7
  from langchain_groq import ChatGroq
8
+ from langchain_community.document_loaders import PyPDFLoader
9
 
10
+ # Configuration
11
+ HF_REPO_ID = "Shami96/7solar-documentation"
12
+ HF_PDF_NAME = "7solar_documentation.pdf"
13
+ HF_TOKEN = os.environ.get("HF_TOKEN")
14
 
15
+ # Initialize components
16
+ def initialize_components():
17
+ print("⚙️ Initializing components...")
18
+
19
+ # Load PDF
20
+ try:
21
+ pdf_path = hf_hub_download(
22
+ repo_id=HF_REPO_ID,
23
+ filename=HF_PDF_NAME,
24
+ repo_type="dataset",
25
+ token=HF_TOKEN
26
+ )
27
+ loader = PyPDFLoader(pdf_path)
28
+ documents = loader.load()
29
+ except Exception as e:
30
+ raise RuntimeError(f"Failed to load PDF: {str(e)}")
31
 
32
+ # Create vector store
 
 
33
  text_splitter = RecursiveCharacterTextSplitter(
34
+ chunk_size=1000,
35
+ chunk_overlap=200
36
  )
37
+ chunks = text_splitter.split_documents(documents)
38
  embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
39
  return Chroma.from_documents(chunks, embeddings)
40
 
41
+ # Chat function
42
+ def respond(message, history):
43
+ try:
44
+ # Initialize if not already done
45
+ if 'vector_db' not in globals():
46
+ global vector_db
47
+ vector_db = initialize_components()
48
+
49
+ # Handle greetings
50
+ if message.lower() in ["hi", "hello", "hey"]:
51
+ return "Hello! I'm your 7Solar assistant. How can I help you today?"
52
+
53
+ # Search documents
54
+ docs = vector_db.similarity_search(message, k=3)
55
+ if not docs:
56
+ return "I couldn't find relevant information. Please try another question about 7Solar."
57
+
58
+ # Generate response
59
+ llm = ChatGroq(
60
+ model_name="llama3-70b-8192",
61
+ temperature=0.3
62
+ )
63
+ context = "\n\n".join([doc.page_content for doc in docs])
64
+ response = llm.invoke(
65
+ f"Using only this context:\n{context}\n\nQuestion: {message}\nAnswer:"
66
+ )
67
+ return response.content
68
+ except Exception as e:
69
+ return f"An error occurred: {str(e)}"
70
 
71
+ # Create Gradio interface
72
+ demo = gr.ChatInterface(
73
+ fn=respond,
74
+ title="☀️ 7Solar Assistant",
75
+ description="Ask me anything about 7Solar's services and documentation",
76
+ examples=["What solar packages do you offer?", "How does the registration process work?"],
77
+ cache_examples=False
78
+ )
79
 
80
+ # Launch with error handling
81
+ if __name__ == "__main__":
82
+ try:
83
+ demo.launch(server_name="0.0.0.0", server_port=7860)
84
+ except Exception as e:
85
+ print(f"Failed to launch: {str(e)}")
86
+ raise