Spaces:

MBilal-72
/

GenerativeEngineOptimization

Runtime error

App Files Files Community

GenerativeEngineOptimization / app.py

MBilal-72

Update app.py with system prompt

dc3f770 verified 6 months ago

raw

history blame

10.4 kB

	import os
	import tempfile
	import streamlit as st
	import json

	from langchain_community.document_loaders import PyPDFLoader
	from langchain_community.vectorstores import FAISS
	from langchain_community.embeddings import HuggingFaceEmbeddings
	from langchain.chains import RetrievalQA
	from langchain.prompts import PromptTemplate, ChatPromptTemplate
	from langchain.schema import Document
	from langchain_groq import ChatGroq

	# --- Environment Variables ---
	GROQ_API_KEY = os.getenv("GROQ_API_KEY", "your-groq-api-key")
	HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY", "your-huggingface-api-key")

	# --- Initialize Groq LLM ---
	llm = ChatGroq(
	api_key=GROQ_API_KEY,
	model_name="llama3-8b-8192", # Note: it's `model_name` not `model`
	temperature=0.1
	)

	# --- HuggingFace Embeddings ---
	embedding = HuggingFaceEmbeddings(
	model_name="sentence-transformers/all-MiniLM-L6-v2",
	cache_folder="./hf_cache",
	# huggingfacehub_api_token=HUGGINGFACE_API_KEY
	)

	# --- System Prompt for Content Enhancement ---
	system_prompt = """You are an AI Content Enhancement Specialist. Your purpose is to optimize user-provided text to maximize its effectiveness for large language models (LLMs) in search, question-answering, and conversational AI systems.

	Evaluate the input text based on the following criteria, assigning a score from 1–10 for each:

	Clarity: How easily can the content be understood?

	Structuredness: How well-organized and coherent is the content?

	LLM Answerability: How easily can an LLM extract precise answers from the content?

	Identify the most salient keywords.

	Rewrite the text to improve:

	Clarity and precision

	Logical structure and flow

	Suitability for LLM-based information retrieval

	Present your analysis and optimized text in the following JSON format:

	```json
	{
	"score": {
	"clarity": 8.5,
	"structuredness": 7.0,
	"answerability": 9.0
	},
	"keywords": ["example", "installation", "setup"],
	"optimized_text": "..."
	}
	```"""

	# --- Create Chat Prompt Template for Content Enhancement ---
	enhancement_prompt = ChatPromptTemplate.from_messages([
	("system", system_prompt),
	("user", "{input}")
	])

	# --- Streamlit UI ---
	st.title("📄📥 Chat with PDF or Text using Groq + RAG")
	st.sidebar.title("Features")
	st.sidebar.markdown("- Upload PDF files")
	st.sidebar.markdown("- Paste raw text")
	st.sidebar.markdown("- Content enhancement analysis")
	st.sidebar.markdown("- Question answering with RAG")

	# Create tabs for different functionalities
	tab1, tab2 = st.tabs(["📄 Document Chat", "🔧 Content Enhancement"])

	with tab1:
	st.header("Document Question Answering")

	# Option to upload PDF
	uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])

	# Option to paste raw text
	pasted_text = st.text_area("Or paste some text below:", height=150)

	# User's question
	user_query = st.text_input("Ask a question about the content")

	# Submit button for QA
	submit_qa_button = st.button("Submit Question", key="qa_submit")

	if submit_qa_button:
	if not user_query.strip():
	st.warning("Please enter a question.")
	st.stop()

	documents = []

	# Handle uploaded PDF
	if uploaded_file:
	with st.spinner("Processing PDF..."):
	with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
	tmp_file.write(uploaded_file.read())
	tmp_path = tmp_file.name

	loader = PyPDFLoader(tmp_path)
	documents = loader.load_and_split()

	# Clean up temporary file
	os.unlink(tmp_path)

	# Handle pasted text if no PDF
	elif pasted_text.strip():
	documents = [Document(page_content=pasted_text)]

	else:
	st.warning("Please upload a PDF or paste some text.")
	st.stop()

	# Create vector store
	with st.spinner("Creating embeddings..."):
	vectorstore = FAISS.from_documents(documents, embedding)
	retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

	# Custom prompt for QA
	qa_prompt_template = PromptTemplate(
	input_variables=["context", "question"],
	template="""You are an AI assistant. Use the following context to answer the question.
	Be concise, accurate, and helpful. If the answer is not in the context, say so.

	Context: {context}
	Question: {question}
	Answer:"""
	)

	# QA Chain
	qa_chain = RetrievalQA.from_chain_type(
	llm=llm,
	chain_type="stuff",
	retriever=retriever,
	return_source_documents=True,
	chain_type_kwargs={"prompt": qa_prompt_template}
	)

	# Run QA
	with st.spinner("Generating answer..."):
	try:
	result = qa_chain({"query": user_query})

	# Show result
	st.markdown("### 💬 Answer")
	st.write(result["result"])

	# Show sources
	with st.expander("📄 Source Documents"):
	for i, doc in enumerate(result["source_documents"]):
	st.write(f"Source {i+1}:")
	st.write(doc.page_content[:500] + "..." if len(doc.page_content) > 500 else doc.page_content)
	if hasattr(doc, 'metadata') and doc.metadata:
	st.write(f"Metadata: {doc.metadata}")
	st.write("---")

	except Exception as e:
	st.error(f"An error occurred: {str(e)}")

	with tab2:
	st.header("Content Enhancement Analysis")
	st.markdown("Analyze and optimize your content for better LLM performance.")

	# Text input for enhancement
	enhancement_text = st.text_area("Enter text to analyze and enhance:", height=200, key="enhancement_input")

	# Submit button for enhancement
	submit_enhancement_button = st.button("Analyze & Enhance", key="enhancement_submit")

	if submit_enhancement_button:
	if not enhancement_text.strip():
	st.warning("Please enter some text to analyze.")
	st.stop()

	with st.spinner("Analyzing content..."):
	try:
	# Create the enhancement chain
	enhancement_chain = enhancement_prompt \| llm

	# Run enhancement analysis
	result = enhancement_chain.invoke({"input": enhancement_text})

	# Parse the result
	result_content = result.content if hasattr(result, 'content') else str(result)

	st.markdown("### 📊 Analysis Results")

	# Try to extract JSON from the response
	try:
	# Find JSON in the response
	json_start = result_content.find('{')
	json_end = result_content.rfind('}') + 1

	if json_start != -1 and json_end != -1:
	json_str = result_content[json_start:json_end]
	analysis_data = json.loads(json_str)

	# Display scores
	st.markdown("#### Scores (1-10)")
	col1, col2, col3 = st.columns(3)

	with col1:
	clarity_score = analysis_data.get('score', {}).get('clarity', 'N/A')
	st.metric("Clarity", clarity_score)

	with col2:
	struct_score = analysis_data.get('score', {}).get('structuredness', 'N/A')
	st.metric("Structure", struct_score)

	with col3:
	answer_score = analysis_data.get('score', {}).get('answerability', 'N/A')
	st.metric("Answerability", answer_score)

	# Display keywords
	keywords = analysis_data.get('keywords', [])
	if keywords:
	st.markdown("#### 🔑 Key Terms")
	st.write(", ".join(keywords))

	# Display optimized text
	optimized_text = analysis_data.get('optimized_text', '')
	if optimized_text:
	st.markdown("#### ✨ Optimized Content")
	st.text_area("Enhanced version:", value=optimized_text, height=200, key="optimized_output")

	# Option to copy optimized text
	if st.button("📋 Copy Optimized Text"):
	st.success("Text copied to clipboard! (Note: Manual copy from text area above)")
	else:
	# Fallback: display raw response
	st.markdown("#### Analysis Response")
	st.write(result_content)

	except json.JSONDecodeError:
	# Fallback: display raw response
	st.markdown("#### Analysis Response")
	st.write(result_content)

	except Exception as e:
	st.error(f"An error occurred during enhancement: {str(e)}")

	# --- Sidebar Information ---
	with st.sidebar:
	st.markdown("---")
	st.markdown("### 🔧 Configuration")
	st.markdown("Make sure to set your API keys:")
	st.code("export GROQ_API_KEY='your-key'")
	st.code("export HUGGINGFACE_API_KEY='your-key'")

	st.markdown("---")
	st.markdown("### ℹ️ About")
	st.markdown("This app combines:")
	st.markdown("- Groq LLM for fast inference")
	st.markdown("- FAISS for vector search")
	st.markdown("- HuggingFace embeddings")
	st.markdown("- RAG for accurate answers")

	# --- Footer ---
	st.markdown("---")
	st.markdown("Built with Streamlit, LangChain, and Groq")