Spaces:
Runtime error
Runtime error
| import os | |
| import tempfile | |
| import streamlit as st | |
| import json | |
| from langchain_community.document_loaders import PyPDFLoader | |
| from langchain_community.vectorstores import FAISS | |
| from langchain_community.embeddings import HuggingFaceEmbeddings | |
| from langchain.chains import RetrievalQA | |
| from langchain.prompts import PromptTemplate, ChatPromptTemplate | |
| from langchain.schema import Document | |
| from langchain_groq import ChatGroq | |
| # --- Environment Variables --- | |
| GROQ_API_KEY = os.getenv("GROQ_API_KEY", "your-groq-api-key") | |
| HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY", "your-huggingface-api-key") | |
| # --- Initialize Groq LLM --- | |
| llm = ChatGroq( | |
| api_key=GROQ_API_KEY, | |
| model_name="llama3-8b-8192", # Note: it's `model_name` not `model` | |
| temperature=0.1 | |
| ) | |
| # --- HuggingFace Embeddings --- | |
| embedding = HuggingFaceEmbeddings( | |
| model_name="sentence-transformers/all-MiniLM-L6-v2", | |
| cache_folder="./hf_cache", | |
| # huggingfacehub_api_token=HUGGINGFACE_API_KEY | |
| ) | |
| # --- System Prompt for Content Enhancement --- | |
| system_prompt = """You are an AI Content Enhancement Specialist. Your purpose is to optimize user-provided text to maximize its effectiveness for large language models (LLMs) in search, question-answering, and conversational AI systems. | |
| Evaluate the input text based on the following criteria, assigning a score from 1β10 for each: | |
| Clarity: How easily can the content be understood? | |
| Structuredness: How well-organized and coherent is the content? | |
| LLM Answerability: How easily can an LLM extract precise answers from the content? | |
| Identify the most salient keywords. | |
| Rewrite the text to improve: | |
| Clarity and precision | |
| Logical structure and flow | |
| Suitability for LLM-based information retrieval | |
| Present your analysis and optimized text in the following JSON format: | |
| ```json | |
| { | |
| "score": { | |
| "clarity": 8.5, | |
| "structuredness": 7.0, | |
| "answerability": 9.0 | |
| }, | |
| "keywords": ["example", "installation", "setup"], | |
| "optimized_text": "..." | |
| } | |
| ```""" | |
| # --- Create Chat Prompt Template for Content Enhancement --- | |
| enhancement_prompt = ChatPromptTemplate.from_messages([ | |
| ("system", system_prompt), | |
| ("user", "{input}") | |
| ]) | |
| # --- Streamlit UI --- | |
| st.title("ππ₯ Chat with PDF or Text using Groq + RAG") | |
| st.sidebar.title("Features") | |
| st.sidebar.markdown("- Upload PDF files") | |
| st.sidebar.markdown("- Paste raw text") | |
| st.sidebar.markdown("- Content enhancement analysis") | |
| st.sidebar.markdown("- Question answering with RAG") | |
| # Create tabs for different functionalities | |
| tab1, tab2 = st.tabs(["π Document Chat", "π§ Content Enhancement"]) | |
| with tab1: | |
| st.header("Document Question Answering") | |
| # Option to upload PDF | |
| uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"]) | |
| # Option to paste raw text | |
| pasted_text = st.text_area("Or paste some text below:", height=150) | |
| # User's question | |
| user_query = st.text_input("Ask a question about the content") | |
| # Submit button for QA | |
| submit_qa_button = st.button("Submit Question", key="qa_submit") | |
| if submit_qa_button: | |
| if not user_query.strip(): | |
| st.warning("Please enter a question.") | |
| st.stop() | |
| documents = [] | |
| # Handle uploaded PDF | |
| if uploaded_file: | |
| with st.spinner("Processing PDF..."): | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file: | |
| tmp_file.write(uploaded_file.read()) | |
| tmp_path = tmp_file.name | |
| loader = PyPDFLoader(tmp_path) | |
| documents = loader.load_and_split() | |
| # Clean up temporary file | |
| os.unlink(tmp_path) | |
| # Handle pasted text if no PDF | |
| elif pasted_text.strip(): | |
| documents = [Document(page_content=pasted_text)] | |
| else: | |
| st.warning("Please upload a PDF or paste some text.") | |
| st.stop() | |
| # Create vector store | |
| with st.spinner("Creating embeddings..."): | |
| vectorstore = FAISS.from_documents(documents, embedding) | |
| retriever = vectorstore.as_retriever(search_kwargs={"k": 3}) | |
| # Custom prompt for QA | |
| qa_prompt_template = PromptTemplate( | |
| input_variables=["context", "question"], | |
| template="""You are an AI assistant. Use the following context to answer the question. | |
| Be concise, accurate, and helpful. If the answer is not in the context, say so. | |
| Context: {context} | |
| Question: {question} | |
| Answer:""" | |
| ) | |
| # QA Chain | |
| qa_chain = RetrievalQA.from_chain_type( | |
| llm=llm, | |
| chain_type="stuff", | |
| retriever=retriever, | |
| return_source_documents=True, | |
| chain_type_kwargs={"prompt": qa_prompt_template} | |
| ) | |
| # Run QA | |
| with st.spinner("Generating answer..."): | |
| try: | |
| result = qa_chain({"query": user_query}) | |
| # Show result | |
| st.markdown("### π¬ Answer") | |
| st.write(result["result"]) | |
| # Show sources | |
| with st.expander("π Source Documents"): | |
| for i, doc in enumerate(result["source_documents"]): | |
| st.write(f"**Source {i+1}:**") | |
| st.write(doc.page_content[:500] + "..." if len(doc.page_content) > 500 else doc.page_content) | |
| if hasattr(doc, 'metadata') and doc.metadata: | |
| st.write(f"*Metadata: {doc.metadata}*") | |
| st.write("---") | |
| except Exception as e: | |
| st.error(f"An error occurred: {str(e)}") | |
| with tab2: | |
| st.header("Content Enhancement Analysis") | |
| st.markdown("Analyze and optimize your content for better LLM performance.") | |
| # Text input for enhancement | |
| enhancement_text = st.text_area("Enter text to analyze and enhance:", height=200, key="enhancement_input") | |
| # Submit button for enhancement | |
| submit_enhancement_button = st.button("Analyze & Enhance", key="enhancement_submit") | |
| if submit_enhancement_button: | |
| if not enhancement_text.strip(): | |
| st.warning("Please enter some text to analyze.") | |
| st.stop() | |
| with st.spinner("Analyzing content..."): | |
| try: | |
| # Create the enhancement chain | |
| enhancement_chain = enhancement_prompt | llm | |
| # Run enhancement analysis | |
| result = enhancement_chain.invoke({"input": enhancement_text}) | |
| # Parse the result | |
| result_content = result.content if hasattr(result, 'content') else str(result) | |
| st.markdown("### π Analysis Results") | |
| # Try to extract JSON from the response | |
| try: | |
| # Find JSON in the response | |
| json_start = result_content.find('{') | |
| json_end = result_content.rfind('}') + 1 | |
| if json_start != -1 and json_end != -1: | |
| json_str = result_content[json_start:json_end] | |
| analysis_data = json.loads(json_str) | |
| # Display scores | |
| st.markdown("#### Scores (1-10)") | |
| col1, col2, col3 = st.columns(3) | |
| with col1: | |
| clarity_score = analysis_data.get('score', {}).get('clarity', 'N/A') | |
| st.metric("Clarity", clarity_score) | |
| with col2: | |
| struct_score = analysis_data.get('score', {}).get('structuredness', 'N/A') | |
| st.metric("Structure", struct_score) | |
| with col3: | |
| answer_score = analysis_data.get('score', {}).get('answerability', 'N/A') | |
| st.metric("Answerability", answer_score) | |
| # Display keywords | |
| keywords = analysis_data.get('keywords', []) | |
| if keywords: | |
| st.markdown("#### π Key Terms") | |
| st.write(", ".join(keywords)) | |
| # Display optimized text | |
| optimized_text = analysis_data.get('optimized_text', '') | |
| if optimized_text: | |
| st.markdown("#### β¨ Optimized Content") | |
| st.text_area("Enhanced version:", value=optimized_text, height=200, key="optimized_output") | |
| # Option to copy optimized text | |
| if st.button("π Copy Optimized Text"): | |
| st.success("Text copied to clipboard! (Note: Manual copy from text area above)") | |
| else: | |
| # Fallback: display raw response | |
| st.markdown("#### Analysis Response") | |
| st.write(result_content) | |
| except json.JSONDecodeError: | |
| # Fallback: display raw response | |
| st.markdown("#### Analysis Response") | |
| st.write(result_content) | |
| except Exception as e: | |
| st.error(f"An error occurred during enhancement: {str(e)}") | |
| # --- Sidebar Information --- | |
| with st.sidebar: | |
| st.markdown("---") | |
| st.markdown("### π§ Configuration") | |
| st.markdown("Make sure to set your API keys:") | |
| st.code("export GROQ_API_KEY='your-key'") | |
| st.code("export HUGGINGFACE_API_KEY='your-key'") | |
| st.markdown("---") | |
| st.markdown("### βΉοΈ About") | |
| st.markdown("This app combines:") | |
| st.markdown("- **Groq LLM** for fast inference") | |
| st.markdown("- **FAISS** for vector search") | |
| st.markdown("- **HuggingFace** embeddings") | |
| st.markdown("- **RAG** for accurate answers") | |
| # --- Footer --- | |
| st.markdown("---") | |
| st.markdown("*Built with Streamlit, LangChain, and Groq*") |