import os import tempfile import streamlit as st import json from langchain_community.document_loaders import PyPDFLoader from langchain_community.vectorstores import FAISS from langchain_community.embeddings import HuggingFaceEmbeddings from langchain.chains import RetrievalQA from langchain.prompts import PromptTemplate, ChatPromptTemplate from langchain.schema import Document from langchain_groq import ChatGroq # --- Environment Variables --- GROQ_API_KEY = os.getenv("GROQ_API_KEY", "your-groq-api-key") HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY", "your-huggingface-api-key") # --- Initialize Groq LLM --- llm = ChatGroq( api_key=GROQ_API_KEY, model_name="llama3-8b-8192", # Note: it's `model_name` not `model` temperature=0.1 ) # --- HuggingFace Embeddings --- embedding = HuggingFaceEmbeddings( model_name="sentence-transformers/all-MiniLM-L6-v2", cache_folder="./hf_cache", # huggingfacehub_api_token=HUGGINGFACE_API_KEY ) # --- System Prompt for Content Enhancement --- system_prompt = """You are an AI Content Enhancement Specialist. Your purpose is to optimize user-provided text to maximize its effectiveness for large language models (LLMs) in search, question-answering, and conversational AI systems. Evaluate the input text based on the following criteria, assigning a score from 1–10 for each: Clarity: How easily can the content be understood? Structuredness: How well-organized and coherent is the content? LLM Answerability: How easily can an LLM extract precise answers from the content? Identify the most salient keywords. Rewrite the text to improve: Clarity and precision Logical structure and flow Suitability for LLM-based information retrieval Present your analysis and optimized text in the following JSON format: ```json { "score": { "clarity": 8.5, "structuredness": 7.0, "answerability": 9.0 }, "keywords": ["example", "installation", "setup"], "optimized_text": "..." } ```""" # --- Create Chat Prompt Template for Content Enhancement --- enhancement_prompt = ChatPromptTemplate.from_messages([ ("system", system_prompt), ("user", "{input}") ]) # --- Streamlit UI --- st.title("📄đŸ“Ĩ Chat with PDF or Text using Groq + RAG") st.sidebar.title("Features") st.sidebar.markdown("- Upload PDF files") st.sidebar.markdown("- Paste raw text") st.sidebar.markdown("- Content enhancement analysis") st.sidebar.markdown("- Question answering with RAG") # Create tabs for different functionalities tab1, tab2 = st.tabs(["📄 Document Chat", "🔧 Content Enhancement"]) with tab1: st.header("Document Question Answering") # Option to upload PDF uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"]) # Option to paste raw text pasted_text = st.text_area("Or paste some text below:", height=150) # User's question user_query = st.text_input("Ask a question about the content") # Submit button for QA submit_qa_button = st.button("Submit Question", key="qa_submit") if submit_qa_button: if not user_query.strip(): st.warning("Please enter a question.") st.stop() documents = [] # Handle uploaded PDF if uploaded_file: with st.spinner("Processing PDF..."): with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file: tmp_file.write(uploaded_file.read()) tmp_path = tmp_file.name loader = PyPDFLoader(tmp_path) documents = loader.load_and_split() # Clean up temporary file os.unlink(tmp_path) # Handle pasted text if no PDF elif pasted_text.strip(): documents = [Document(page_content=pasted_text)] else: st.warning("Please upload a PDF or paste some text.") st.stop() # Create vector store with st.spinner("Creating embeddings..."): vectorstore = FAISS.from_documents(documents, embedding) retriever = vectorstore.as_retriever(search_kwargs={"k": 3}) # Custom prompt for QA qa_prompt_template = PromptTemplate( input_variables=["context", "question"], template="""You are an AI assistant. Use the following context to answer the question. Be concise, accurate, and helpful. If the answer is not in the context, say so. Context: {context} Question: {question} Answer:""" ) # QA Chain qa_chain = RetrievalQA.from_chain_type( llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True, chain_type_kwargs={"prompt": qa_prompt_template} ) # Run QA with st.spinner("Generating answer..."): try: result = qa_chain({"query": user_query}) # Show result st.markdown("### đŸ’Ŧ Answer") st.write(result["result"]) # Show sources with st.expander("📄 Source Documents"): for i, doc in enumerate(result["source_documents"]): st.write(f"**Source {i+1}:**") st.write(doc.page_content[:500] + "..." if len(doc.page_content) > 500 else doc.page_content) if hasattr(doc, 'metadata') and doc.metadata: st.write(f"*Metadata: {doc.metadata}*") st.write("---") except Exception as e: st.error(f"An error occurred: {str(e)}") with tab2: st.header("Content Enhancement Analysis") st.markdown("Analyze and optimize your content for better LLM performance.") # Text input for enhancement enhancement_text = st.text_area("Enter text to analyze and enhance:", height=200, key="enhancement_input") # Submit button for enhancement submit_enhancement_button = st.button("Analyze & Enhance", key="enhancement_submit") if submit_enhancement_button: if not enhancement_text.strip(): st.warning("Please enter some text to analyze.") st.stop() with st.spinner("Analyzing content..."): try: # Create the enhancement chain enhancement_chain = enhancement_prompt | llm # Run enhancement analysis result = enhancement_chain.invoke({"input": enhancement_text}) # Parse the result result_content = result.content if hasattr(result, 'content') else str(result) st.markdown("### 📊 Analysis Results") # Try to extract JSON from the response try: # Find JSON in the response json_start = result_content.find('{') json_end = result_content.rfind('}') + 1 if json_start != -1 and json_end != -1: json_str = result_content[json_start:json_end] analysis_data = json.loads(json_str) # Display scores st.markdown("#### Scores (1-10)") col1, col2, col3 = st.columns(3) with col1: clarity_score = analysis_data.get('score', {}).get('clarity', 'N/A') st.metric("Clarity", clarity_score) with col2: struct_score = analysis_data.get('score', {}).get('structuredness', 'N/A') st.metric("Structure", struct_score) with col3: answer_score = analysis_data.get('score', {}).get('answerability', 'N/A') st.metric("Answerability", answer_score) # Display keywords keywords = analysis_data.get('keywords', []) if keywords: st.markdown("#### 🔑 Key Terms") st.write(", ".join(keywords)) # Display optimized text optimized_text = analysis_data.get('optimized_text', '') if optimized_text: st.markdown("#### ✨ Optimized Content") st.text_area("Enhanced version:", value=optimized_text, height=200, key="optimized_output") # Option to copy optimized text if st.button("📋 Copy Optimized Text"): st.success("Text copied to clipboard! (Note: Manual copy from text area above)") else: # Fallback: display raw response st.markdown("#### Analysis Response") st.write(result_content) except json.JSONDecodeError: # Fallback: display raw response st.markdown("#### Analysis Response") st.write(result_content) except Exception as e: st.error(f"An error occurred during enhancement: {str(e)}") # --- Sidebar Information --- with st.sidebar: st.markdown("---") st.markdown("### 🔧 Configuration") st.markdown("Make sure to set your API keys:") st.code("export GROQ_API_KEY='your-key'") st.code("export HUGGINGFACE_API_KEY='your-key'") st.markdown("---") st.markdown("### â„šī¸ About") st.markdown("This app combines:") st.markdown("- **Groq LLM** for fast inference") st.markdown("- **FAISS** for vector search") st.markdown("- **HuggingFace** embeddings") st.markdown("- **RAG** for accurate answers") # --- Footer --- st.markdown("---") st.markdown("*Built with Streamlit, LangChain, and Groq*")