File size: 10,360 Bytes
736448d 837c8fa 1a7b2d4 dc3f770 837c8fa 2d6ed01 837c8fa 09aa142 dc3f770 0d5b491 1a7b2d4 0d5b491 5d4a40e 837c8fa 0d5b491 837c8fa 0d5b491 837c8fa 0d5b491 ea7b8ea 2ae095c 3099672 ea7b8ea dc3f770 837c8fa b7b493d 0d5b491 dc3f770 837c8fa dc3f770 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 |
import os
import tempfile
import streamlit as st
import json
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate, ChatPromptTemplate
from langchain.schema import Document
from langchain_groq import ChatGroq
# --- Environment Variables ---
GROQ_API_KEY = os.getenv("GROQ_API_KEY", "your-groq-api-key")
HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY", "your-huggingface-api-key")
# --- Initialize Groq LLM ---
llm = ChatGroq(
api_key=GROQ_API_KEY,
model_name="llama3-8b-8192", # Note: it's `model_name` not `model`
temperature=0.1
)
# --- HuggingFace Embeddings ---
embedding = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2",
cache_folder="./hf_cache",
# huggingfacehub_api_token=HUGGINGFACE_API_KEY
)
# --- System Prompt for Content Enhancement ---
system_prompt = """You are an AI Content Enhancement Specialist. Your purpose is to optimize user-provided text to maximize its effectiveness for large language models (LLMs) in search, question-answering, and conversational AI systems.
Evaluate the input text based on the following criteria, assigning a score from 1β10 for each:
Clarity: How easily can the content be understood?
Structuredness: How well-organized and coherent is the content?
LLM Answerability: How easily can an LLM extract precise answers from the content?
Identify the most salient keywords.
Rewrite the text to improve:
Clarity and precision
Logical structure and flow
Suitability for LLM-based information retrieval
Present your analysis and optimized text in the following JSON format:
```json
{
"score": {
"clarity": 8.5,
"structuredness": 7.0,
"answerability": 9.0
},
"keywords": ["example", "installation", "setup"],
"optimized_text": "..."
}
```"""
# --- Create Chat Prompt Template for Content Enhancement ---
enhancement_prompt = ChatPromptTemplate.from_messages([
("system", system_prompt),
("user", "{input}")
])
# --- Streamlit UI ---
st.title("ππ₯ Chat with PDF or Text using Groq + RAG")
st.sidebar.title("Features")
st.sidebar.markdown("- Upload PDF files")
st.sidebar.markdown("- Paste raw text")
st.sidebar.markdown("- Content enhancement analysis")
st.sidebar.markdown("- Question answering with RAG")
# Create tabs for different functionalities
tab1, tab2 = st.tabs(["π Document Chat", "π§ Content Enhancement"])
with tab1:
st.header("Document Question Answering")
# Option to upload PDF
uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
# Option to paste raw text
pasted_text = st.text_area("Or paste some text below:", height=150)
# User's question
user_query = st.text_input("Ask a question about the content")
# Submit button for QA
submit_qa_button = st.button("Submit Question", key="qa_submit")
if submit_qa_button:
if not user_query.strip():
st.warning("Please enter a question.")
st.stop()
documents = []
# Handle uploaded PDF
if uploaded_file:
with st.spinner("Processing PDF..."):
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
tmp_file.write(uploaded_file.read())
tmp_path = tmp_file.name
loader = PyPDFLoader(tmp_path)
documents = loader.load_and_split()
# Clean up temporary file
os.unlink(tmp_path)
# Handle pasted text if no PDF
elif pasted_text.strip():
documents = [Document(page_content=pasted_text)]
else:
st.warning("Please upload a PDF or paste some text.")
st.stop()
# Create vector store
with st.spinner("Creating embeddings..."):
vectorstore = FAISS.from_documents(documents, embedding)
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
# Custom prompt for QA
qa_prompt_template = PromptTemplate(
input_variables=["context", "question"],
template="""You are an AI assistant. Use the following context to answer the question.
Be concise, accurate, and helpful. If the answer is not in the context, say so.
Context: {context}
Question: {question}
Answer:"""
)
# QA Chain
qa_chain = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=retriever,
return_source_documents=True,
chain_type_kwargs={"prompt": qa_prompt_template}
)
# Run QA
with st.spinner("Generating answer..."):
try:
result = qa_chain({"query": user_query})
# Show result
st.markdown("### π¬ Answer")
st.write(result["result"])
# Show sources
with st.expander("π Source Documents"):
for i, doc in enumerate(result["source_documents"]):
st.write(f"**Source {i+1}:**")
st.write(doc.page_content[:500] + "..." if len(doc.page_content) > 500 else doc.page_content)
if hasattr(doc, 'metadata') and doc.metadata:
st.write(f"*Metadata: {doc.metadata}*")
st.write("---")
except Exception as e:
st.error(f"An error occurred: {str(e)}")
with tab2:
st.header("Content Enhancement Analysis")
st.markdown("Analyze and optimize your content for better LLM performance.")
# Text input for enhancement
enhancement_text = st.text_area("Enter text to analyze and enhance:", height=200, key="enhancement_input")
# Submit button for enhancement
submit_enhancement_button = st.button("Analyze & Enhance", key="enhancement_submit")
if submit_enhancement_button:
if not enhancement_text.strip():
st.warning("Please enter some text to analyze.")
st.stop()
with st.spinner("Analyzing content..."):
try:
# Create the enhancement chain
enhancement_chain = enhancement_prompt | llm
# Run enhancement analysis
result = enhancement_chain.invoke({"input": enhancement_text})
# Parse the result
result_content = result.content if hasattr(result, 'content') else str(result)
st.markdown("### π Analysis Results")
# Try to extract JSON from the response
try:
# Find JSON in the response
json_start = result_content.find('{')
json_end = result_content.rfind('}') + 1
if json_start != -1 and json_end != -1:
json_str = result_content[json_start:json_end]
analysis_data = json.loads(json_str)
# Display scores
st.markdown("#### Scores (1-10)")
col1, col2, col3 = st.columns(3)
with col1:
clarity_score = analysis_data.get('score', {}).get('clarity', 'N/A')
st.metric("Clarity", clarity_score)
with col2:
struct_score = analysis_data.get('score', {}).get('structuredness', 'N/A')
st.metric("Structure", struct_score)
with col3:
answer_score = analysis_data.get('score', {}).get('answerability', 'N/A')
st.metric("Answerability", answer_score)
# Display keywords
keywords = analysis_data.get('keywords', [])
if keywords:
st.markdown("#### π Key Terms")
st.write(", ".join(keywords))
# Display optimized text
optimized_text = analysis_data.get('optimized_text', '')
if optimized_text:
st.markdown("#### β¨ Optimized Content")
st.text_area("Enhanced version:", value=optimized_text, height=200, key="optimized_output")
# Option to copy optimized text
if st.button("π Copy Optimized Text"):
st.success("Text copied to clipboard! (Note: Manual copy from text area above)")
else:
# Fallback: display raw response
st.markdown("#### Analysis Response")
st.write(result_content)
except json.JSONDecodeError:
# Fallback: display raw response
st.markdown("#### Analysis Response")
st.write(result_content)
except Exception as e:
st.error(f"An error occurred during enhancement: {str(e)}")
# --- Sidebar Information ---
with st.sidebar:
st.markdown("---")
st.markdown("### π§ Configuration")
st.markdown("Make sure to set your API keys:")
st.code("export GROQ_API_KEY='your-key'")
st.code("export HUGGINGFACE_API_KEY='your-key'")
st.markdown("---")
st.markdown("### βΉοΈ About")
st.markdown("This app combines:")
st.markdown("- **Groq LLM** for fast inference")
st.markdown("- **FAISS** for vector search")
st.markdown("- **HuggingFace** embeddings")
st.markdown("- **RAG** for accurate answers")
# --- Footer ---
st.markdown("---")
st.markdown("*Built with Streamlit, LangChain, and Groq*") |