Spaces:

MBilal-72
/

GenerativeEngineOptimization

Runtime error

App Files Files Community

MBilal-72 commited on Jul 25, 2025

Commit

dc3f770

verified ·

1 Parent(s): 832dfed

Update app.py with system prompt

Browse files

Files changed (1) hide show

app.py +241 -79

app.py CHANGED Viewed

@@ -1,14 +1,14 @@
 import os
 import tempfile
 import streamlit as st
 from langchain_community.document_loaders import PyPDFLoader
 from langchain_community.vectorstores import FAISS
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain.chains import RetrievalQA
-from langchain.prompts import PromptTemplate
 from langchain.schema import Document
-# from langchain_groq import GroqLLM
 from langchain_groq import ChatGroq
 # --- Environment Variables ---
@@ -16,11 +16,6 @@ GROQ_API_KEY = os.getenv("GROQ_API_KEY", "your-groq-api-key")
 HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY", "your-huggingface-api-key")
 # --- Initialize Groq LLM ---
-# llm = GroqLLM(
-#     api_key=GROQ_API_KEY,
-#     model="llama3-8b-8192",
-#     temperature=0.1
-# )
 llm = ChatGroq(
     api_key=GROQ_API_KEY,
     model_name="llama3-8b-8192",  # Note: it's `model_name` not `model`
@@ -33,79 +28,246 @@ embedding = HuggingFaceEmbeddings(
     cache_folder="./hf_cache",
     # huggingfacehub_api_token=HUGGINGFACE_API_KEY
 )
-# embedding = HuggingFaceEmbeddings(
-#     model_name="sentence-transformers/all-MiniLM-L6-v2"
-# )
 # --- Streamlit UI ---
 st.title("📄📥 Chat with PDF or Text using Groq + RAG")
-# Option to upload PDF
-uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
-# Option to paste raw text
-pasted_text = st.text_area("Or paste some text below:")
-# User's question
-user_query = st.text_input("Ask a question about the content")
-# Submit button
-submit_button = st.button("Submit")
-if submit_button:
-    documents = []
-    # Handle uploaded PDF
-    if uploaded_file:
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
-            tmp_file.write(uploaded_file.read())
-            tmp_path = tmp_file.name
-        loader = PyPDFLoader(tmp_path)
-        documents = loader.load_and_split()
-    # Handle pasted text if no PDF
-    elif pasted_text.strip():
-        documents = [Document(page_content=pasted_text)]
-    else:
-        st.warning("Please upload a PDF or paste some text.")
-        st.stop()
-    # Create vector store
-    vectorstore = FAISS.from_documents(documents, embedding)
-    retriever = vectorstore.as_retriever()
-    # Optional custom prompt
-    prompt_template = PromptTemplate(
-        input_variables=["context", "question"],
-        template="""
-        You are an AI assistant. Use the following context to answer the question.
-        Be concise, accurate, and helpful.
-        Context: {context}
-        Question: {question}
-        Answer:"""
-    )
-    # QA Chain
-    qa_chain = RetrievalQA.from_chain_type(
-        llm=llm,
-        chain_type="stuff",
-        retriever=retriever,
-        return_source_documents=True,
-        chain_type_kwargs={"prompt": prompt_template}
-    )
-    # Run QA
-    result = qa_chain({"query": user_query})
-    # Show result
-    st.markdown("### 💬 Answer")
-    st.write(result["result"])
-    # Show sources (only if from PDF)
-    if uploaded_file:
-        with st.expander("📄 Sources"):
-            for i, doc in enumerate(result["source_documents"]):
-                st.write(f"**Page {i+1}** — {doc.metadata.get('source', 'Unknown')}")

 import os
 import tempfile
 import streamlit as st
+import json
 from langchain_community.document_loaders import PyPDFLoader
 from langchain_community.vectorstores import FAISS
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain.chains import RetrievalQA
+from langchain.prompts import PromptTemplate, ChatPromptTemplate
 from langchain.schema import Document
 from langchain_groq import ChatGroq
 # --- Environment Variables ---
 HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY", "your-huggingface-api-key")
 # --- Initialize Groq LLM ---
 llm = ChatGroq(
     api_key=GROQ_API_KEY,
     model_name="llama3-8b-8192",  # Note: it's `model_name` not `model`
     cache_folder="./hf_cache",
     # huggingfacehub_api_token=HUGGINGFACE_API_KEY
 )
+# --- System Prompt for Content Enhancement ---
+system_prompt = """You are an AI Content Enhancement Specialist. Your purpose is to optimize user-provided text to maximize its effectiveness for large language models (LLMs) in search, question-answering, and conversational AI systems.
+Evaluate the input text based on the following criteria, assigning a score from 1–10 for each:
+Clarity: How easily can the content be understood?
+Structuredness: How well-organized and coherent is the content?
+LLM Answerability: How easily can an LLM extract precise answers from the content?
+Identify the most salient keywords.
+Rewrite the text to improve:
+Clarity and precision
+Logical structure and flow
+Suitability for LLM-based information retrieval
+Present your analysis and optimized text in the following JSON format:
+```json
+{
+"score": {
+"clarity": 8.5,
+"structuredness": 7.0,
+"answerability": 9.0
+},
+"keywords": ["example", "installation", "setup"],
+"optimized_text": "..."
+}
+```"""
+# --- Create Chat Prompt Template for Content Enhancement ---
+enhancement_prompt = ChatPromptTemplate.from_messages([
+    ("system", system_prompt),
+    ("user", "{input}")
+])
 # --- Streamlit UI ---
 st.title("📄📥 Chat with PDF or Text using Groq + RAG")
+st.sidebar.title("Features")
+st.sidebar.markdown("- Upload PDF files")
+st.sidebar.markdown("- Paste raw text")
+st.sidebar.markdown("- Content enhancement analysis")
+st.sidebar.markdown("- Question answering with RAG")
+# Create tabs for different functionalities
+tab1, tab2 = st.tabs(["📄 Document Chat", "🔧 Content Enhancement"])
+with tab1:
+    st.header("Document Question Answering")
+    # Option to upload PDF
+    uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
+    # Option to paste raw text
+    pasted_text = st.text_area("Or paste some text below:", height=150)
+    # User's question
+    user_query = st.text_input("Ask a question about the content")
+    # Submit button for QA
+    submit_qa_button = st.button("Submit Question", key="qa_submit")
+    if submit_qa_button:
+        if not user_query.strip():
+            st.warning("Please enter a question.")
+            st.stop()
+        documents = []
+        # Handle uploaded PDF
+        if uploaded_file:
+            with st.spinner("Processing PDF..."):
+                with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
+                    tmp_file.write(uploaded_file.read())
+                    tmp_path = tmp_file.name
+                loader = PyPDFLoader(tmp_path)
+                documents = loader.load_and_split()
+                # Clean up temporary file
+                os.unlink(tmp_path)
+        # Handle pasted text if no PDF
+        elif pasted_text.strip():
+            documents = [Document(page_content=pasted_text)]
+        else:
+            st.warning("Please upload a PDF or paste some text.")
+            st.stop()
+        # Create vector store
+        with st.spinner("Creating embeddings..."):
+            vectorstore = FAISS.from_documents(documents, embedding)
+            retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
+        # Custom prompt for QA
+        qa_prompt_template = PromptTemplate(
+            input_variables=["context", "question"],
+            template="""You are an AI assistant. Use the following context to answer the question.
+            Be concise, accurate, and helpful. If the answer is not in the context, say so.
+            Context: {context}
+            Question: {question}
+            Answer:"""
+        )
+        # QA Chain
+        qa_chain = RetrievalQA.from_chain_type(
+            llm=llm,
+            chain_type="stuff",
+            retriever=retriever,
+            return_source_documents=True,
+            chain_type_kwargs={"prompt": qa_prompt_template}
+        )
+        # Run QA
+        with st.spinner("Generating answer..."):
+            try:
+                result = qa_chain({"query": user_query})
+                # Show result
+                st.markdown("### 💬 Answer")
+                st.write(result["result"])
+                # Show sources
+                with st.expander("📄 Source Documents"):
+                    for i, doc in enumerate(result["source_documents"]):
+                        st.write(f"**Source {i+1}:**")
+                        st.write(doc.page_content[:500] + "..." if len(doc.page_content) > 500 else doc.page_content)
+                        if hasattr(doc, 'metadata') and doc.metadata:
+                            st.write(f"*Metadata: {doc.metadata}*")
+                        st.write("---")
+            except Exception as e:
+                st.error(f"An error occurred: {str(e)}")
+with tab2:
+    st.header("Content Enhancement Analysis")
+    st.markdown("Analyze and optimize your content for better LLM performance.")
+    # Text input for enhancement
+    enhancement_text = st.text_area("Enter text to analyze and enhance:", height=200, key="enhancement_input")
+    # Submit button for enhancement
+    submit_enhancement_button = st.button("Analyze & Enhance", key="enhancement_submit")
+    if submit_enhancement_button:
+        if not enhancement_text.strip():
+            st.warning("Please enter some text to analyze.")
+            st.stop()
+        with st.spinner("Analyzing content..."):
+            try:
+                # Create the enhancement chain
+                enhancement_chain = enhancement_prompt | llm
+                # Run enhancement analysis
+                result = enhancement_chain.invoke({"input": enhancement_text})
+                # Parse the result
+                result_content = result.content if hasattr(result, 'content') else str(result)
+                st.markdown("### 📊 Analysis Results")
+                # Try to extract JSON from the response
+                try:
+                    # Find JSON in the response
+                    json_start = result_content.find('{')
+                    json_end = result_content.rfind('}') + 1
+                    if json_start != -1 and json_end != -1:
+                        json_str = result_content[json_start:json_end]
+                        analysis_data = json.loads(json_str)
+                        # Display scores
+                        st.markdown("#### Scores (1-10)")
+                        col1, col2, col3 = st.columns(3)
+                        with col1:
+                            clarity_score = analysis_data.get('score', {}).get('clarity', 'N/A')
+                            st.metric("Clarity", clarity_score)
+                        with col2:
+                            struct_score = analysis_data.get('score', {}).get('structuredness', 'N/A')
+                            st.metric("Structure", struct_score)
+                        with col3:
+                            answer_score = analysis_data.get('score', {}).get('answerability', 'N/A')
+                            st.metric("Answerability", answer_score)
+                        # Display keywords
+                        keywords = analysis_data.get('keywords', [])
+                        if keywords:
+                            st.markdown("#### 🔑 Key Terms")
+                            st.write(", ".join(keywords))
+                        # Display optimized text
+                        optimized_text = analysis_data.get('optimized_text', '')
+                        if optimized_text:
+                            st.markdown("#### ✨ Optimized Content")
+                            st.text_area("Enhanced version:", value=optimized_text, height=200, key="optimized_output")
+                            # Option to copy optimized text
+                            if st.button("📋 Copy Optimized Text"):
+                                st.success("Text copied to clipboard! (Note: Manual copy from text area above)")
+                    else:
+                        # Fallback: display raw response
+                        st.markdown("#### Analysis Response")
+                        st.write(result_content)
+                except json.JSONDecodeError:
+                    # Fallback: display raw response
+                    st.markdown("#### Analysis Response")
+                    st.write(result_content)
+            except Exception as e:
+                st.error(f"An error occurred during enhancement: {str(e)}")
+# --- Sidebar Information ---
+with st.sidebar:
+    st.markdown("---")
+    st.markdown("### 🔧 Configuration")
+    st.markdown("Make sure to set your API keys:")
+    st.code("export GROQ_API_KEY='your-key'")
+    st.code("export HUGGINGFACE_API_KEY='your-key'")
+    st.markdown("---")
+    st.markdown("### ℹ️ About")
+    st.markdown("This app combines:")
+    st.markdown("- **Groq LLM** for fast inference")
+    st.markdown("- **FAISS** for vector search")
+    st.markdown("- **HuggingFace** embeddings")
+    st.markdown("- **RAG** for accurate answers")
+# --- Footer ---
+st.markdown("---")
+st.markdown("*Built with Streamlit, LangChain, and Groq*")