MBilal-72 commited on
Commit
dc3f770
·
verified ·
1 Parent(s): 832dfed

Update app.py with system prompt

Browse files
Files changed (1) hide show
  1. app.py +241 -79
app.py CHANGED
@@ -1,14 +1,14 @@
1
  import os
2
  import tempfile
3
  import streamlit as st
 
4
 
5
  from langchain_community.document_loaders import PyPDFLoader
6
  from langchain_community.vectorstores import FAISS
7
  from langchain_community.embeddings import HuggingFaceEmbeddings
8
  from langchain.chains import RetrievalQA
9
- from langchain.prompts import PromptTemplate
10
  from langchain.schema import Document
11
- # from langchain_groq import GroqLLM
12
  from langchain_groq import ChatGroq
13
 
14
  # --- Environment Variables ---
@@ -16,11 +16,6 @@ GROQ_API_KEY = os.getenv("GROQ_API_KEY", "your-groq-api-key")
16
  HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY", "your-huggingface-api-key")
17
 
18
  # --- Initialize Groq LLM ---
19
- # llm = GroqLLM(
20
- # api_key=GROQ_API_KEY,
21
- # model="llama3-8b-8192",
22
- # temperature=0.1
23
- # )
24
  llm = ChatGroq(
25
  api_key=GROQ_API_KEY,
26
  model_name="llama3-8b-8192", # Note: it's `model_name` not `model`
@@ -33,79 +28,246 @@ embedding = HuggingFaceEmbeddings(
33
  cache_folder="./hf_cache",
34
  # huggingfacehub_api_token=HUGGINGFACE_API_KEY
35
  )
36
- # embedding = HuggingFaceEmbeddings(
37
- # model_name="sentence-transformers/all-MiniLM-L6-v2"
38
- # )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
  # --- Streamlit UI ---
41
  st.title("📄📥 Chat with PDF or Text using Groq + RAG")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
- # Option to upload PDF
44
- uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
45
-
46
- # Option to paste raw text
47
- pasted_text = st.text_area("Or paste some text below:")
48
-
49
- # User's question
50
- user_query = st.text_input("Ask a question about the content")
51
-
52
- # Submit button
53
- submit_button = st.button("Submit")
54
-
55
- if submit_button:
56
- documents = []
57
-
58
- # Handle uploaded PDF
59
- if uploaded_file:
60
- with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
61
- tmp_file.write(uploaded_file.read())
62
- tmp_path = tmp_file.name
63
-
64
- loader = PyPDFLoader(tmp_path)
65
- documents = loader.load_and_split()
66
-
67
- # Handle pasted text if no PDF
68
- elif pasted_text.strip():
69
- documents = [Document(page_content=pasted_text)]
70
-
71
- else:
72
- st.warning("Please upload a PDF or paste some text.")
73
- st.stop()
74
-
75
- # Create vector store
76
- vectorstore = FAISS.from_documents(documents, embedding)
77
- retriever = vectorstore.as_retriever()
78
-
79
- # Optional custom prompt
80
- prompt_template = PromptTemplate(
81
- input_variables=["context", "question"],
82
- template="""
83
- You are an AI assistant. Use the following context to answer the question.
84
- Be concise, accurate, and helpful.
85
-
86
- Context: {context}
87
- Question: {question}
88
- Answer:"""
89
- )
90
-
91
- # QA Chain
92
- qa_chain = RetrievalQA.from_chain_type(
93
- llm=llm,
94
- chain_type="stuff",
95
- retriever=retriever,
96
- return_source_documents=True,
97
- chain_type_kwargs={"prompt": prompt_template}
98
- )
99
-
100
- # Run QA
101
- result = qa_chain({"query": user_query})
102
-
103
- # Show result
104
- st.markdown("### 💬 Answer")
105
- st.write(result["result"])
106
-
107
- # Show sources (only if from PDF)
108
- if uploaded_file:
109
- with st.expander("📄 Sources"):
110
- for i, doc in enumerate(result["source_documents"]):
111
- st.write(f"**Page {i+1}** — {doc.metadata.get('source', 'Unknown')}")
 
1
  import os
2
  import tempfile
3
  import streamlit as st
4
+ import json
5
 
6
  from langchain_community.document_loaders import PyPDFLoader
7
  from langchain_community.vectorstores import FAISS
8
  from langchain_community.embeddings import HuggingFaceEmbeddings
9
  from langchain.chains import RetrievalQA
10
+ from langchain.prompts import PromptTemplate, ChatPromptTemplate
11
  from langchain.schema import Document
 
12
  from langchain_groq import ChatGroq
13
 
14
  # --- Environment Variables ---
 
16
  HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY", "your-huggingface-api-key")
17
 
18
  # --- Initialize Groq LLM ---
 
 
 
 
 
19
  llm = ChatGroq(
20
  api_key=GROQ_API_KEY,
21
  model_name="llama3-8b-8192", # Note: it's `model_name` not `model`
 
28
  cache_folder="./hf_cache",
29
  # huggingfacehub_api_token=HUGGINGFACE_API_KEY
30
  )
31
+
32
+ # --- System Prompt for Content Enhancement ---
33
+ system_prompt = """You are an AI Content Enhancement Specialist. Your purpose is to optimize user-provided text to maximize its effectiveness for large language models (LLMs) in search, question-answering, and conversational AI systems.
34
+
35
+ Evaluate the input text based on the following criteria, assigning a score from 1–10 for each:
36
+
37
+ Clarity: How easily can the content be understood?
38
+
39
+ Structuredness: How well-organized and coherent is the content?
40
+
41
+ LLM Answerability: How easily can an LLM extract precise answers from the content?
42
+
43
+ Identify the most salient keywords.
44
+
45
+ Rewrite the text to improve:
46
+
47
+ Clarity and precision
48
+
49
+ Logical structure and flow
50
+
51
+ Suitability for LLM-based information retrieval
52
+
53
+ Present your analysis and optimized text in the following JSON format:
54
+
55
+ ```json
56
+ {
57
+ "score": {
58
+ "clarity": 8.5,
59
+ "structuredness": 7.0,
60
+ "answerability": 9.0
61
+ },
62
+ "keywords": ["example", "installation", "setup"],
63
+ "optimized_text": "..."
64
+ }
65
+ ```"""
66
+
67
+ # --- Create Chat Prompt Template for Content Enhancement ---
68
+ enhancement_prompt = ChatPromptTemplate.from_messages([
69
+ ("system", system_prompt),
70
+ ("user", "{input}")
71
+ ])
72
 
73
  # --- Streamlit UI ---
74
  st.title("📄📥 Chat with PDF or Text using Groq + RAG")
75
+ st.sidebar.title("Features")
76
+ st.sidebar.markdown("- Upload PDF files")
77
+ st.sidebar.markdown("- Paste raw text")
78
+ st.sidebar.markdown("- Content enhancement analysis")
79
+ st.sidebar.markdown("- Question answering with RAG")
80
+
81
+ # Create tabs for different functionalities
82
+ tab1, tab2 = st.tabs(["📄 Document Chat", "🔧 Content Enhancement"])
83
+
84
+ with tab1:
85
+ st.header("Document Question Answering")
86
+
87
+ # Option to upload PDF
88
+ uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
89
+
90
+ # Option to paste raw text
91
+ pasted_text = st.text_area("Or paste some text below:", height=150)
92
+
93
+ # User's question
94
+ user_query = st.text_input("Ask a question about the content")
95
+
96
+ # Submit button for QA
97
+ submit_qa_button = st.button("Submit Question", key="qa_submit")
98
+
99
+ if submit_qa_button:
100
+ if not user_query.strip():
101
+ st.warning("Please enter a question.")
102
+ st.stop()
103
+
104
+ documents = []
105
+
106
+ # Handle uploaded PDF
107
+ if uploaded_file:
108
+ with st.spinner("Processing PDF..."):
109
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
110
+ tmp_file.write(uploaded_file.read())
111
+ tmp_path = tmp_file.name
112
+
113
+ loader = PyPDFLoader(tmp_path)
114
+ documents = loader.load_and_split()
115
+
116
+ # Clean up temporary file
117
+ os.unlink(tmp_path)
118
+
119
+ # Handle pasted text if no PDF
120
+ elif pasted_text.strip():
121
+ documents = [Document(page_content=pasted_text)]
122
+
123
+ else:
124
+ st.warning("Please upload a PDF or paste some text.")
125
+ st.stop()
126
+
127
+ # Create vector store
128
+ with st.spinner("Creating embeddings..."):
129
+ vectorstore = FAISS.from_documents(documents, embedding)
130
+ retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
131
+
132
+ # Custom prompt for QA
133
+ qa_prompt_template = PromptTemplate(
134
+ input_variables=["context", "question"],
135
+ template="""You are an AI assistant. Use the following context to answer the question.
136
+ Be concise, accurate, and helpful. If the answer is not in the context, say so.
137
+
138
+ Context: {context}
139
+ Question: {question}
140
+ Answer:"""
141
+ )
142
+
143
+ # QA Chain
144
+ qa_chain = RetrievalQA.from_chain_type(
145
+ llm=llm,
146
+ chain_type="stuff",
147
+ retriever=retriever,
148
+ return_source_documents=True,
149
+ chain_type_kwargs={"prompt": qa_prompt_template}
150
+ )
151
+
152
+ # Run QA
153
+ with st.spinner("Generating answer..."):
154
+ try:
155
+ result = qa_chain({"query": user_query})
156
+
157
+ # Show result
158
+ st.markdown("### 💬 Answer")
159
+ st.write(result["result"])
160
+
161
+ # Show sources
162
+ with st.expander("📄 Source Documents"):
163
+ for i, doc in enumerate(result["source_documents"]):
164
+ st.write(f"**Source {i+1}:**")
165
+ st.write(doc.page_content[:500] + "..." if len(doc.page_content) > 500 else doc.page_content)
166
+ if hasattr(doc, 'metadata') and doc.metadata:
167
+ st.write(f"*Metadata: {doc.metadata}*")
168
+ st.write("---")
169
+
170
+ except Exception as e:
171
+ st.error(f"An error occurred: {str(e)}")
172
+
173
+ with tab2:
174
+ st.header("Content Enhancement Analysis")
175
+ st.markdown("Analyze and optimize your content for better LLM performance.")
176
+
177
+ # Text input for enhancement
178
+ enhancement_text = st.text_area("Enter text to analyze and enhance:", height=200, key="enhancement_input")
179
+
180
+ # Submit button for enhancement
181
+ submit_enhancement_button = st.button("Analyze & Enhance", key="enhancement_submit")
182
+
183
+ if submit_enhancement_button:
184
+ if not enhancement_text.strip():
185
+ st.warning("Please enter some text to analyze.")
186
+ st.stop()
187
+
188
+ with st.spinner("Analyzing content..."):
189
+ try:
190
+ # Create the enhancement chain
191
+ enhancement_chain = enhancement_prompt | llm
192
+
193
+ # Run enhancement analysis
194
+ result = enhancement_chain.invoke({"input": enhancement_text})
195
+
196
+ # Parse the result
197
+ result_content = result.content if hasattr(result, 'content') else str(result)
198
+
199
+ st.markdown("### 📊 Analysis Results")
200
+
201
+ # Try to extract JSON from the response
202
+ try:
203
+ # Find JSON in the response
204
+ json_start = result_content.find('{')
205
+ json_end = result_content.rfind('}') + 1
206
+
207
+ if json_start != -1 and json_end != -1:
208
+ json_str = result_content[json_start:json_end]
209
+ analysis_data = json.loads(json_str)
210
+
211
+ # Display scores
212
+ st.markdown("#### Scores (1-10)")
213
+ col1, col2, col3 = st.columns(3)
214
+
215
+ with col1:
216
+ clarity_score = analysis_data.get('score', {}).get('clarity', 'N/A')
217
+ st.metric("Clarity", clarity_score)
218
+
219
+ with col2:
220
+ struct_score = analysis_data.get('score', {}).get('structuredness', 'N/A')
221
+ st.metric("Structure", struct_score)
222
+
223
+ with col3:
224
+ answer_score = analysis_data.get('score', {}).get('answerability', 'N/A')
225
+ st.metric("Answerability", answer_score)
226
+
227
+ # Display keywords
228
+ keywords = analysis_data.get('keywords', [])
229
+ if keywords:
230
+ st.markdown("#### 🔑 Key Terms")
231
+ st.write(", ".join(keywords))
232
+
233
+ # Display optimized text
234
+ optimized_text = analysis_data.get('optimized_text', '')
235
+ if optimized_text:
236
+ st.markdown("#### ✨ Optimized Content")
237
+ st.text_area("Enhanced version:", value=optimized_text, height=200, key="optimized_output")
238
+
239
+ # Option to copy optimized text
240
+ if st.button("📋 Copy Optimized Text"):
241
+ st.success("Text copied to clipboard! (Note: Manual copy from text area above)")
242
+ else:
243
+ # Fallback: display raw response
244
+ st.markdown("#### Analysis Response")
245
+ st.write(result_content)
246
+
247
+ except json.JSONDecodeError:
248
+ # Fallback: display raw response
249
+ st.markdown("#### Analysis Response")
250
+ st.write(result_content)
251
+
252
+ except Exception as e:
253
+ st.error(f"An error occurred during enhancement: {str(e)}")
254
+
255
+ # --- Sidebar Information ---
256
+ with st.sidebar:
257
+ st.markdown("---")
258
+ st.markdown("### 🔧 Configuration")
259
+ st.markdown("Make sure to set your API keys:")
260
+ st.code("export GROQ_API_KEY='your-key'")
261
+ st.code("export HUGGINGFACE_API_KEY='your-key'")
262
+
263
+ st.markdown("---")
264
+ st.markdown("### ℹ️ About")
265
+ st.markdown("This app combines:")
266
+ st.markdown("- **Groq LLM** for fast inference")
267
+ st.markdown("- **FAISS** for vector search")
268
+ st.markdown("- **HuggingFace** embeddings")
269
+ st.markdown("- **RAG** for accurate answers")
270
 
271
+ # --- Footer ---
272
+ st.markdown("---")
273
+ st.markdown("*Built with Streamlit, LangChain, and Groq*")