MBilal-72's picture
Update app.py with system prompt
dc3f770 verified
raw
history blame
10.4 kB
import os
import tempfile
import streamlit as st
import json
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate, ChatPromptTemplate
from langchain.schema import Document
from langchain_groq import ChatGroq
# --- Environment Variables ---
GROQ_API_KEY = os.getenv("GROQ_API_KEY", "your-groq-api-key")
HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY", "your-huggingface-api-key")
# --- Initialize Groq LLM ---
llm = ChatGroq(
api_key=GROQ_API_KEY,
model_name="llama3-8b-8192", # Note: it's `model_name` not `model`
temperature=0.1
)
# --- HuggingFace Embeddings ---
embedding = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2",
cache_folder="./hf_cache",
# huggingfacehub_api_token=HUGGINGFACE_API_KEY
)
# --- System Prompt for Content Enhancement ---
system_prompt = """You are an AI Content Enhancement Specialist. Your purpose is to optimize user-provided text to maximize its effectiveness for large language models (LLMs) in search, question-answering, and conversational AI systems.
Evaluate the input text based on the following criteria, assigning a score from 1–10 for each:
Clarity: How easily can the content be understood?
Structuredness: How well-organized and coherent is the content?
LLM Answerability: How easily can an LLM extract precise answers from the content?
Identify the most salient keywords.
Rewrite the text to improve:
Clarity and precision
Logical structure and flow
Suitability for LLM-based information retrieval
Present your analysis and optimized text in the following JSON format:
```json
{
"score": {
"clarity": 8.5,
"structuredness": 7.0,
"answerability": 9.0
},
"keywords": ["example", "installation", "setup"],
"optimized_text": "..."
}
```"""
# --- Create Chat Prompt Template for Content Enhancement ---
enhancement_prompt = ChatPromptTemplate.from_messages([
("system", system_prompt),
("user", "{input}")
])
# --- Streamlit UI ---
st.title("πŸ“„πŸ“₯ Chat with PDF or Text using Groq + RAG")
st.sidebar.title("Features")
st.sidebar.markdown("- Upload PDF files")
st.sidebar.markdown("- Paste raw text")
st.sidebar.markdown("- Content enhancement analysis")
st.sidebar.markdown("- Question answering with RAG")
# Create tabs for different functionalities
tab1, tab2 = st.tabs(["πŸ“„ Document Chat", "πŸ”§ Content Enhancement"])
with tab1:
st.header("Document Question Answering")
# Option to upload PDF
uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
# Option to paste raw text
pasted_text = st.text_area("Or paste some text below:", height=150)
# User's question
user_query = st.text_input("Ask a question about the content")
# Submit button for QA
submit_qa_button = st.button("Submit Question", key="qa_submit")
if submit_qa_button:
if not user_query.strip():
st.warning("Please enter a question.")
st.stop()
documents = []
# Handle uploaded PDF
if uploaded_file:
with st.spinner("Processing PDF..."):
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
tmp_file.write(uploaded_file.read())
tmp_path = tmp_file.name
loader = PyPDFLoader(tmp_path)
documents = loader.load_and_split()
# Clean up temporary file
os.unlink(tmp_path)
# Handle pasted text if no PDF
elif pasted_text.strip():
documents = [Document(page_content=pasted_text)]
else:
st.warning("Please upload a PDF or paste some text.")
st.stop()
# Create vector store
with st.spinner("Creating embeddings..."):
vectorstore = FAISS.from_documents(documents, embedding)
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
# Custom prompt for QA
qa_prompt_template = PromptTemplate(
input_variables=["context", "question"],
template="""You are an AI assistant. Use the following context to answer the question.
Be concise, accurate, and helpful. If the answer is not in the context, say so.
Context: {context}
Question: {question}
Answer:"""
)
# QA Chain
qa_chain = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=retriever,
return_source_documents=True,
chain_type_kwargs={"prompt": qa_prompt_template}
)
# Run QA
with st.spinner("Generating answer..."):
try:
result = qa_chain({"query": user_query})
# Show result
st.markdown("### πŸ’¬ Answer")
st.write(result["result"])
# Show sources
with st.expander("πŸ“„ Source Documents"):
for i, doc in enumerate(result["source_documents"]):
st.write(f"**Source {i+1}:**")
st.write(doc.page_content[:500] + "..." if len(doc.page_content) > 500 else doc.page_content)
if hasattr(doc, 'metadata') and doc.metadata:
st.write(f"*Metadata: {doc.metadata}*")
st.write("---")
except Exception as e:
st.error(f"An error occurred: {str(e)}")
with tab2:
st.header("Content Enhancement Analysis")
st.markdown("Analyze and optimize your content for better LLM performance.")
# Text input for enhancement
enhancement_text = st.text_area("Enter text to analyze and enhance:", height=200, key="enhancement_input")
# Submit button for enhancement
submit_enhancement_button = st.button("Analyze & Enhance", key="enhancement_submit")
if submit_enhancement_button:
if not enhancement_text.strip():
st.warning("Please enter some text to analyze.")
st.stop()
with st.spinner("Analyzing content..."):
try:
# Create the enhancement chain
enhancement_chain = enhancement_prompt | llm
# Run enhancement analysis
result = enhancement_chain.invoke({"input": enhancement_text})
# Parse the result
result_content = result.content if hasattr(result, 'content') else str(result)
st.markdown("### πŸ“Š Analysis Results")
# Try to extract JSON from the response
try:
# Find JSON in the response
json_start = result_content.find('{')
json_end = result_content.rfind('}') + 1
if json_start != -1 and json_end != -1:
json_str = result_content[json_start:json_end]
analysis_data = json.loads(json_str)
# Display scores
st.markdown("#### Scores (1-10)")
col1, col2, col3 = st.columns(3)
with col1:
clarity_score = analysis_data.get('score', {}).get('clarity', 'N/A')
st.metric("Clarity", clarity_score)
with col2:
struct_score = analysis_data.get('score', {}).get('structuredness', 'N/A')
st.metric("Structure", struct_score)
with col3:
answer_score = analysis_data.get('score', {}).get('answerability', 'N/A')
st.metric("Answerability", answer_score)
# Display keywords
keywords = analysis_data.get('keywords', [])
if keywords:
st.markdown("#### πŸ”‘ Key Terms")
st.write(", ".join(keywords))
# Display optimized text
optimized_text = analysis_data.get('optimized_text', '')
if optimized_text:
st.markdown("#### ✨ Optimized Content")
st.text_area("Enhanced version:", value=optimized_text, height=200, key="optimized_output")
# Option to copy optimized text
if st.button("πŸ“‹ Copy Optimized Text"):
st.success("Text copied to clipboard! (Note: Manual copy from text area above)")
else:
# Fallback: display raw response
st.markdown("#### Analysis Response")
st.write(result_content)
except json.JSONDecodeError:
# Fallback: display raw response
st.markdown("#### Analysis Response")
st.write(result_content)
except Exception as e:
st.error(f"An error occurred during enhancement: {str(e)}")
# --- Sidebar Information ---
with st.sidebar:
st.markdown("---")
st.markdown("### πŸ”§ Configuration")
st.markdown("Make sure to set your API keys:")
st.code("export GROQ_API_KEY='your-key'")
st.code("export HUGGINGFACE_API_KEY='your-key'")
st.markdown("---")
st.markdown("### ℹ️ About")
st.markdown("This app combines:")
st.markdown("- **Groq LLM** for fast inference")
st.markdown("- **FAISS** for vector search")
st.markdown("- **HuggingFace** embeddings")
st.markdown("- **RAG** for accurate answers")
# --- Footer ---
st.markdown("---")
st.markdown("*Built with Streamlit, LangChain, and Groq*")