|
|
import os |
|
|
import tempfile |
|
|
import streamlit as st |
|
|
import json |
|
|
|
|
|
from langchain_community.document_loaders import PyPDFLoader |
|
|
from langchain_community.vectorstores import FAISS |
|
|
from langchain_community.embeddings import HuggingFaceEmbeddings |
|
|
from langchain.chains import RetrievalQA |
|
|
from langchain.prompts import PromptTemplate, ChatPromptTemplate |
|
|
from langchain.schema import Document |
|
|
from langchain_groq import ChatGroq |
|
|
|
|
|
|
|
|
GROQ_API_KEY = os.getenv("GROQ_API_KEY", "your-groq-api-key") |
|
|
HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY", "your-huggingface-api-key") |
|
|
|
|
|
|
|
|
llm = ChatGroq( |
|
|
api_key=GROQ_API_KEY, |
|
|
model_name="llama3-8b-8192", |
|
|
temperature=0.1 |
|
|
) |
|
|
|
|
|
|
|
|
embedding = HuggingFaceEmbeddings( |
|
|
model_name="sentence-transformers/all-MiniLM-L6-v2", |
|
|
cache_folder="./hf_cache", |
|
|
|
|
|
) |
|
|
|
|
|
|
|
|
system_prompt = """You are an AI Content Enhancement Specialist. Your purpose is to optimize user-provided text to maximize its effectiveness for large language models (LLMs) in search, question-answering, and conversational AI systems. |
|
|
|
|
|
Evaluate the input text based on the following criteria, assigning a score from 1β10 for each: |
|
|
|
|
|
Clarity: How easily can the content be understood? |
|
|
|
|
|
Structuredness: How well-organized and coherent is the content? |
|
|
|
|
|
LLM Answerability: How easily can an LLM extract precise answers from the content? |
|
|
|
|
|
Identify the most salient keywords. |
|
|
|
|
|
Rewrite the text to improve: |
|
|
|
|
|
Clarity and precision |
|
|
|
|
|
Logical structure and flow |
|
|
|
|
|
Suitability for LLM-based information retrieval |
|
|
|
|
|
Present your analysis and optimized text in the following JSON format: |
|
|
|
|
|
```json |
|
|
{ |
|
|
"score": { |
|
|
"clarity": 8.5, |
|
|
"structuredness": 7.0, |
|
|
"answerability": 9.0 |
|
|
}, |
|
|
"keywords": ["example", "installation", "setup"], |
|
|
"optimized_text": "..." |
|
|
} |
|
|
```""" |
|
|
|
|
|
|
|
|
enhancement_prompt = ChatPromptTemplate.from_messages([ |
|
|
("system", system_prompt), |
|
|
("user", "{input}") |
|
|
]) |
|
|
|
|
|
|
|
|
st.title("ππ₯ Chat with PDF or Text using Groq + RAG") |
|
|
st.sidebar.title("Features") |
|
|
st.sidebar.markdown("- Upload PDF files") |
|
|
st.sidebar.markdown("- Paste raw text") |
|
|
st.sidebar.markdown("- Content enhancement analysis") |
|
|
st.sidebar.markdown("- Question answering with RAG") |
|
|
|
|
|
|
|
|
tab1, tab2 = st.tabs(["π Document Chat", "π§ Content Enhancement"]) |
|
|
|
|
|
with tab1: |
|
|
st.header("Document Question Answering") |
|
|
|
|
|
|
|
|
uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"]) |
|
|
|
|
|
|
|
|
pasted_text = st.text_area("Or paste some text below:", height=150) |
|
|
|
|
|
|
|
|
user_query = st.text_input("Ask a question about the content") |
|
|
|
|
|
|
|
|
submit_qa_button = st.button("Submit Question", key="qa_submit") |
|
|
|
|
|
if submit_qa_button: |
|
|
if not user_query.strip(): |
|
|
st.warning("Please enter a question.") |
|
|
st.stop() |
|
|
|
|
|
documents = [] |
|
|
|
|
|
|
|
|
if uploaded_file: |
|
|
with st.spinner("Processing PDF..."): |
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file: |
|
|
tmp_file.write(uploaded_file.read()) |
|
|
tmp_path = tmp_file.name |
|
|
|
|
|
loader = PyPDFLoader(tmp_path) |
|
|
documents = loader.load_and_split() |
|
|
|
|
|
|
|
|
os.unlink(tmp_path) |
|
|
|
|
|
|
|
|
elif pasted_text.strip(): |
|
|
documents = [Document(page_content=pasted_text)] |
|
|
|
|
|
else: |
|
|
st.warning("Please upload a PDF or paste some text.") |
|
|
st.stop() |
|
|
|
|
|
|
|
|
with st.spinner("Creating embeddings..."): |
|
|
vectorstore = FAISS.from_documents(documents, embedding) |
|
|
retriever = vectorstore.as_retriever(search_kwargs={"k": 3}) |
|
|
|
|
|
|
|
|
qa_prompt_template = PromptTemplate( |
|
|
input_variables=["context", "question"], |
|
|
template="""You are an AI assistant. Use the following context to answer the question. |
|
|
Be concise, accurate, and helpful. If the answer is not in the context, say so. |
|
|
|
|
|
Context: {context} |
|
|
Question: {question} |
|
|
Answer:""" |
|
|
) |
|
|
|
|
|
|
|
|
qa_chain = RetrievalQA.from_chain_type( |
|
|
llm=llm, |
|
|
chain_type="stuff", |
|
|
retriever=retriever, |
|
|
return_source_documents=True, |
|
|
chain_type_kwargs={"prompt": qa_prompt_template} |
|
|
) |
|
|
|
|
|
|
|
|
with st.spinner("Generating answer..."): |
|
|
try: |
|
|
result = qa_chain({"query": user_query}) |
|
|
|
|
|
|
|
|
st.markdown("### π¬ Answer") |
|
|
st.write(result["result"]) |
|
|
|
|
|
|
|
|
with st.expander("π Source Documents"): |
|
|
for i, doc in enumerate(result["source_documents"]): |
|
|
st.write(f"**Source {i+1}:**") |
|
|
st.write(doc.page_content[:500] + "..." if len(doc.page_content) > 500 else doc.page_content) |
|
|
if hasattr(doc, 'metadata') and doc.metadata: |
|
|
st.write(f"*Metadata: {doc.metadata}*") |
|
|
st.write("---") |
|
|
|
|
|
except Exception as e: |
|
|
st.error(f"An error occurred: {str(e)}") |
|
|
|
|
|
with tab2: |
|
|
st.header("Content Enhancement Analysis") |
|
|
st.markdown("Analyze and optimize your content for better LLM performance.") |
|
|
|
|
|
|
|
|
enhancement_text = st.text_area("Enter text to analyze and enhance:", height=200, key="enhancement_input") |
|
|
|
|
|
|
|
|
submit_enhancement_button = st.button("Analyze & Enhance", key="enhancement_submit") |
|
|
|
|
|
if submit_enhancement_button: |
|
|
if not enhancement_text.strip(): |
|
|
st.warning("Please enter some text to analyze.") |
|
|
st.stop() |
|
|
|
|
|
with st.spinner("Analyzing content..."): |
|
|
try: |
|
|
|
|
|
enhancement_chain = enhancement_prompt | llm |
|
|
|
|
|
|
|
|
result = enhancement_chain.invoke({"input": enhancement_text}) |
|
|
|
|
|
|
|
|
result_content = result.content if hasattr(result, 'content') else str(result) |
|
|
|
|
|
st.markdown("### π Analysis Results") |
|
|
|
|
|
|
|
|
try: |
|
|
|
|
|
json_start = result_content.find('{') |
|
|
json_end = result_content.rfind('}') + 1 |
|
|
|
|
|
if json_start != -1 and json_end != -1: |
|
|
json_str = result_content[json_start:json_end] |
|
|
analysis_data = json.loads(json_str) |
|
|
|
|
|
|
|
|
st.markdown("#### Scores (1-10)") |
|
|
col1, col2, col3 = st.columns(3) |
|
|
|
|
|
with col1: |
|
|
clarity_score = analysis_data.get('score', {}).get('clarity', 'N/A') |
|
|
st.metric("Clarity", clarity_score) |
|
|
|
|
|
with col2: |
|
|
struct_score = analysis_data.get('score', {}).get('structuredness', 'N/A') |
|
|
st.metric("Structure", struct_score) |
|
|
|
|
|
with col3: |
|
|
answer_score = analysis_data.get('score', {}).get('answerability', 'N/A') |
|
|
st.metric("Answerability", answer_score) |
|
|
|
|
|
|
|
|
keywords = analysis_data.get('keywords', []) |
|
|
if keywords: |
|
|
st.markdown("#### π Key Terms") |
|
|
st.write(", ".join(keywords)) |
|
|
|
|
|
|
|
|
optimized_text = analysis_data.get('optimized_text', '') |
|
|
if optimized_text: |
|
|
st.markdown("#### β¨ Optimized Content") |
|
|
st.text_area("Enhanced version:", value=optimized_text, height=200, key="optimized_output") |
|
|
|
|
|
|
|
|
if st.button("π Copy Optimized Text"): |
|
|
st.success("Text copied to clipboard! (Note: Manual copy from text area above)") |
|
|
else: |
|
|
|
|
|
st.markdown("#### Analysis Response") |
|
|
st.write(result_content) |
|
|
|
|
|
except json.JSONDecodeError: |
|
|
|
|
|
st.markdown("#### Analysis Response") |
|
|
st.write(result_content) |
|
|
|
|
|
except Exception as e: |
|
|
st.error(f"An error occurred during enhancement: {str(e)}") |
|
|
|
|
|
|
|
|
with st.sidebar: |
|
|
st.markdown("---") |
|
|
st.markdown("### π§ Configuration") |
|
|
st.markdown("Make sure to set your API keys:") |
|
|
st.code("export GROQ_API_KEY='your-key'") |
|
|
st.code("export HUGGINGFACE_API_KEY='your-key'") |
|
|
|
|
|
st.markdown("---") |
|
|
st.markdown("### βΉοΈ About") |
|
|
st.markdown("This app combines:") |
|
|
st.markdown("- **Groq LLM** for fast inference") |
|
|
st.markdown("- **FAISS** for vector search") |
|
|
st.markdown("- **HuggingFace** embeddings") |
|
|
st.markdown("- **RAG** for accurate answers") |
|
|
|
|
|
|
|
|
st.markdown("---") |
|
|
st.markdown("*Built with Streamlit, LangChain, and Groq*") |