MBilal-72 commited on
Commit
b47cd08
Β·
verified Β·
1 Parent(s): dc3f770

update app.py with link analyzer

Browse files
Files changed (1) hide show
  1. app.py +341 -59
app.py CHANGED
@@ -2,6 +2,12 @@ import os
2
  import tempfile
3
  import streamlit as st
4
  import json
 
 
 
 
 
 
5
 
6
  from langchain_community.document_loaders import PyPDFLoader
7
  from langchain_community.vectorstores import FAISS
@@ -18,7 +24,7 @@ HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY", "your-huggingface-api-key
18
  # --- Initialize Groq LLM ---
19
  llm = ChatGroq(
20
  api_key=GROQ_API_KEY,
21
- model_name="llama3-8b-8192", # Note: it's `model_name` not `model`
22
  temperature=0.1
23
  )
24
 
@@ -26,7 +32,6 @@ llm = ChatGroq(
26
  embedding = HuggingFaceEmbeddings(
27
  model_name="sentence-transformers/all-MiniLM-L6-v2",
28
  cache_folder="./hf_cache",
29
- # huggingfacehub_api_token=HUGGINGFACE_API_KEY
30
  )
31
 
32
  # --- System Prompt for Content Enhancement ---
@@ -64,6 +69,140 @@ Present your analysis and optimized text in the following JSON format:
64
  }
65
  ```"""
66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  # --- Create Chat Prompt Template for Content Enhancement ---
68
  enhancement_prompt = ChatPromptTemplate.from_messages([
69
  ("system", system_prompt),
@@ -71,29 +210,25 @@ enhancement_prompt = ChatPromptTemplate.from_messages([
71
  ])
72
 
73
  # --- Streamlit UI ---
74
- st.title("πŸ“„πŸ“₯ Chat with PDF or Text using Groq + RAG")
75
- st.sidebar.title("Features")
76
- st.sidebar.markdown("- Upload PDF files")
77
- st.sidebar.markdown("- Paste raw text")
78
- st.sidebar.markdown("- Content enhancement analysis")
79
- st.sidebar.markdown("- Question answering with RAG")
 
 
 
80
 
81
- # Create tabs for different functionalities
82
- tab1, tab2 = st.tabs(["πŸ“„ Document Chat", "πŸ”§ Content Enhancement"])
83
 
84
  with tab1:
85
  st.header("Document Question Answering")
86
 
87
- # Option to upload PDF
88
  uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
89
-
90
- # Option to paste raw text
91
  pasted_text = st.text_area("Or paste some text below:", height=150)
92
-
93
- # User's question
94
  user_query = st.text_input("Ask a question about the content")
95
-
96
- # Submit button for QA
97
  submit_qa_button = st.button("Submit Question", key="qa_submit")
98
 
99
  if submit_qa_button:
@@ -103,7 +238,6 @@ with tab1:
103
 
104
  documents = []
105
 
106
- # Handle uploaded PDF
107
  if uploaded_file:
108
  with st.spinner("Processing PDF..."):
109
  with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
@@ -112,24 +246,18 @@ with tab1:
112
 
113
  loader = PyPDFLoader(tmp_path)
114
  documents = loader.load_and_split()
115
-
116
- # Clean up temporary file
117
  os.unlink(tmp_path)
118
 
119
- # Handle pasted text if no PDF
120
  elif pasted_text.strip():
121
  documents = [Document(page_content=pasted_text)]
122
-
123
  else:
124
  st.warning("Please upload a PDF or paste some text.")
125
  st.stop()
126
 
127
- # Create vector store
128
  with st.spinner("Creating embeddings..."):
129
  vectorstore = FAISS.from_documents(documents, embedding)
130
  retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
131
 
132
- # Custom prompt for QA
133
  qa_prompt_template = PromptTemplate(
134
  input_variables=["context", "question"],
135
  template="""You are an AI assistant. Use the following context to answer the question.
@@ -140,7 +268,6 @@ with tab1:
140
  Answer:"""
141
  )
142
 
143
- # QA Chain
144
  qa_chain = RetrievalQA.from_chain_type(
145
  llm=llm,
146
  chain_type="stuff",
@@ -149,16 +276,12 @@ with tab1:
149
  chain_type_kwargs={"prompt": qa_prompt_template}
150
  )
151
 
152
- # Run QA
153
  with st.spinner("Generating answer..."):
154
  try:
155
  result = qa_chain({"query": user_query})
156
-
157
- # Show result
158
  st.markdown("### πŸ’¬ Answer")
159
  st.write(result["result"])
160
 
161
- # Show sources
162
  with st.expander("πŸ“„ Source Documents"):
163
  for i, doc in enumerate(result["source_documents"]):
164
  st.write(f"**Source {i+1}:**")
@@ -172,12 +295,7 @@ with tab1:
172
 
173
  with tab2:
174
  st.header("Content Enhancement Analysis")
175
- st.markdown("Analyze and optimize your content for better LLM performance.")
176
-
177
- # Text input for enhancement
178
  enhancement_text = st.text_area("Enter text to analyze and enhance:", height=200, key="enhancement_input")
179
-
180
- # Submit button for enhancement
181
  submit_enhancement_button = st.button("Analyze & Enhance", key="enhancement_submit")
182
 
183
  if submit_enhancement_button:
@@ -187,20 +305,13 @@ with tab2:
187
 
188
  with st.spinner("Analyzing content..."):
189
  try:
190
- # Create the enhancement chain
191
  enhancement_chain = enhancement_prompt | llm
192
-
193
- # Run enhancement analysis
194
  result = enhancement_chain.invoke({"input": enhancement_text})
195
-
196
- # Parse the result
197
  result_content = result.content if hasattr(result, 'content') else str(result)
198
 
199
  st.markdown("### πŸ“Š Analysis Results")
200
 
201
- # Try to extract JSON from the response
202
  try:
203
- # Find JSON in the response
204
  json_start = result_content.find('{')
205
  json_end = result_content.rfind('}') + 1
206
 
@@ -208,7 +319,6 @@ with tab2:
208
  json_str = result_content[json_start:json_end]
209
  analysis_data = json.loads(json_str)
210
 
211
- # Display scores
212
  st.markdown("#### Scores (1-10)")
213
  col1, col2, col3 = st.columns(3)
214
 
@@ -224,50 +334,222 @@ with tab2:
224
  answer_score = analysis_data.get('score', {}).get('answerability', 'N/A')
225
  st.metric("Answerability", answer_score)
226
 
227
- # Display keywords
228
  keywords = analysis_data.get('keywords', [])
229
  if keywords:
230
  st.markdown("#### πŸ”‘ Key Terms")
231
  st.write(", ".join(keywords))
232
 
233
- # Display optimized text
234
  optimized_text = analysis_data.get('optimized_text', '')
235
  if optimized_text:
236
  st.markdown("#### ✨ Optimized Content")
237
  st.text_area("Enhanced version:", value=optimized_text, height=200, key="optimized_output")
238
-
239
- # Option to copy optimized text
240
- if st.button("πŸ“‹ Copy Optimized Text"):
241
- st.success("Text copied to clipboard! (Note: Manual copy from text area above)")
242
  else:
243
- # Fallback: display raw response
244
  st.markdown("#### Analysis Response")
245
  st.write(result_content)
246
 
247
  except json.JSONDecodeError:
248
- # Fallback: display raw response
249
  st.markdown("#### Analysis Response")
250
  st.write(result_content)
251
 
252
  except Exception as e:
253
  st.error(f"An error occurred during enhancement: {str(e)}")
254
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
255
  # --- Sidebar Information ---
256
  with st.sidebar:
257
  st.markdown("---")
258
  st.markdown("### πŸ”§ Configuration")
259
- st.markdown("Make sure to set your API keys:")
260
  st.code("export GROQ_API_KEY='your-key'")
261
- st.code("export HUGGINGFACE_API_KEY='your-key'")
 
 
 
 
 
 
262
 
263
  st.markdown("---")
264
  st.markdown("### ℹ️ About")
265
- st.markdown("This app combines:")
266
- st.markdown("- **Groq LLM** for fast inference")
267
- st.markdown("- **FAISS** for vector search")
268
- st.markdown("- **HuggingFace** embeddings")
269
- st.markdown("- **RAG** for accurate answers")
270
 
271
- # --- Footer ---
272
  st.markdown("---")
273
- st.markdown("*Built with Streamlit, LangChain, and Groq*")
 
2
  import tempfile
3
  import streamlit as st
4
  import json
5
+ import requests
6
+ from bs4 import BeautifulSoup
7
+ from urllib.parse import urljoin, urlparse
8
+ import time
9
+ from typing import List, Dict, Any
10
+ import pandas as pd
11
 
12
  from langchain_community.document_loaders import PyPDFLoader
13
  from langchain_community.vectorstores import FAISS
 
24
  # --- Initialize Groq LLM ---
25
  llm = ChatGroq(
26
  api_key=GROQ_API_KEY,
27
+ model_name="llama3-8b-8192",
28
  temperature=0.1
29
  )
30
 
 
32
  embedding = HuggingFaceEmbeddings(
33
  model_name="sentence-transformers/all-MiniLM-L6-v2",
34
  cache_folder="./hf_cache",
 
35
  )
36
 
37
  # --- System Prompt for Content Enhancement ---
 
69
  }
70
  ```"""
71
 
72
+ # --- GEO Analysis System Prompt ---
73
+ geo_analysis_prompt = """You are a Generative Engine Optimizer (GEO) specialist. Analyze the provided website content for its effectiveness in AI-powered search engines and LLM systems.
74
+
75
+ Evaluate the content based on these GEO criteria (score 1-10 each):
76
+
77
+ 1. **AI Search Visibility**: How likely is this content to be surfaced by AI search engines?
78
+ 2. **Query Intent Matching**: How well does the content match common user queries?
79
+ 3. **Factual Accuracy & Authority**: How trustworthy and authoritative is the information?
80
+ 4. **Conversational Readiness**: How suitable is the content for AI chat responses?
81
+ 5. **Semantic Richness**: How well does the content use relevant semantic keywords?
82
+ 6. **Context Completeness**: Does the content provide complete, self-contained answers?
83
+ 7. **Citation Worthiness**: How likely are AI systems to cite this content?
84
+ 8. **Multi-Query Coverage**: Does the content answer multiple related questions?
85
+
86
+ Also identify:
87
+ - Primary topics and entities
88
+ - Missing information gaps
89
+ - Optimization opportunities
90
+ - Specific enhancement recommendations
91
+
92
+ Format your response as JSON:
93
+
94
+ ```json
95
+ {
96
+ "geo_scores": {
97
+ "ai_search_visibility": 7.5,
98
+ "query_intent_matching": 8.0,
99
+ "factual_accuracy": 9.0,
100
+ "conversational_readiness": 6.5,
101
+ "semantic_richness": 7.0,
102
+ "context_completeness": 8.5,
103
+ "citation_worthiness": 7.8,
104
+ "multi_query_coverage": 6.0
105
+ },
106
+ "overall_geo_score": 7.5,
107
+ "primary_topics": ["topic1", "topic2"],
108
+ "entities": ["entity1", "entity2"],
109
+ "missing_gaps": ["gap1", "gap2"],
110
+ "optimization_opportunities": [
111
+ {
112
+ "type": "semantic_enhancement",
113
+ "description": "Add more related terms",
114
+ "priority": "high"
115
+ }
116
+ ],
117
+ "recommendations": [
118
+ "Specific actionable recommendation 1",
119
+ "Specific actionable recommendation 2"
120
+ ]
121
+ }
122
+ ```"""
123
+
124
+ # --- Website Scraping Functions ---
125
+ def extract_website_content(url: str, max_pages: int = 5) -> List[Dict[str, Any]]:
126
+ """Extract content from website pages"""
127
+ try:
128
+ headers = {
129
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
130
+ }
131
+
132
+ response = requests.get(url, headers=headers, timeout=10)
133
+ response.raise_for_status()
134
+
135
+ soup = BeautifulSoup(response.content, 'html.parser')
136
+
137
+ # Remove script and style elements
138
+ for script in soup(["script", "style", "nav", "footer", "header"]):
139
+ script.decompose()
140
+
141
+ # Extract main content
142
+ main_content = soup.find('main') or soup.find('article') or soup.find('div', class_='content') or soup.body
143
+
144
+ if main_content:
145
+ text_content = main_content.get_text(separator=' ', strip=True)
146
+ else:
147
+ text_content = soup.get_text(separator=' ', strip=True)
148
+
149
+ # Clean up text
150
+ lines = [line.strip() for line in text_content.split('\n') if line.strip()]
151
+ cleaned_text = ' '.join(lines)
152
+
153
+ # Extract metadata
154
+ title = soup.find('title').get_text() if soup.find('title') else "No Title"
155
+ meta_desc = soup.find('meta', attrs={'name': 'description'})
156
+ description = meta_desc.get('content') if meta_desc else "No Description"
157
+
158
+ # Extract headings
159
+ headings = []
160
+ for i in range(1, 7):
161
+ for heading in soup.find_all(f'h{i}'):
162
+ headings.append({
163
+ 'level': i,
164
+ 'text': heading.get_text(strip=True)
165
+ })
166
+
167
+ return [{
168
+ 'url': url,
169
+ 'title': title,
170
+ 'description': description,
171
+ 'content': cleaned_text[:10000], # Limit content length
172
+ 'headings': headings,
173
+ 'word_count': len(cleaned_text.split())
174
+ }]
175
+
176
+ except Exception as e:
177
+ st.error(f"Error scraping {url}: {str(e)}")
178
+ return []
179
+
180
+ def analyze_page_geo_score(content: str, title: str, llm) -> Dict[str, Any]:
181
+ """Analyze a single page for GEO score"""
182
+ try:
183
+ geo_prompt = ChatPromptTemplate.from_messages([
184
+ ("system", geo_analysis_prompt),
185
+ ("user", f"Title: {title}\n\nContent: {content}")
186
+ ])
187
+
188
+ chain = geo_prompt | llm
189
+ result = chain.invoke({"input": f"Title: {title}\n\nContent: {content}"})
190
+
191
+ result_content = result.content if hasattr(result, 'content') else str(result)
192
+
193
+ # Extract JSON from response
194
+ json_start = result_content.find('{')
195
+ json_end = result_content.rfind('}') + 1
196
+
197
+ if json_start != -1 and json_end != -1:
198
+ json_str = result_content[json_start:json_end]
199
+ return json.loads(json_str)
200
+ else:
201
+ return {"error": "Could not parse GEO analysis"}
202
+
203
+ except Exception as e:
204
+ return {"error": f"Analysis failed: {str(e)}"}
205
+
206
  # --- Create Chat Prompt Template for Content Enhancement ---
207
  enhancement_prompt = ChatPromptTemplate.from_messages([
208
  ("system", system_prompt),
 
210
  ])
211
 
212
  # --- Streamlit UI ---
213
+ st.set_page_config(page_title="AI Content Optimizer", page_icon="πŸš€", layout="wide")
214
+ st.title("πŸš€ AI Content Optimizer & GEO Analyzer")
215
+
216
+ # Sidebar
217
+ st.sidebar.title("πŸ› οΈ Tools")
218
+ st.sidebar.markdown("- πŸ“„ Document Q&A")
219
+ st.sidebar.markdown("- πŸ”§ Content Enhancement")
220
+ st.sidebar.markdown("- 🌐 Website GEO Analysis")
221
+ st.sidebar.markdown("- πŸ“Š SEO-like Scoring")
222
 
223
+ # Create tabs
224
+ tab1, tab2, tab3 = st.tabs(["πŸ“„ Document Chat", "πŸ”§ Content Enhancement", "🌐 Website GEO Analysis"])
225
 
226
  with tab1:
227
  st.header("Document Question Answering")
228
 
 
229
  uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
 
 
230
  pasted_text = st.text_area("Or paste some text below:", height=150)
 
 
231
  user_query = st.text_input("Ask a question about the content")
 
 
232
  submit_qa_button = st.button("Submit Question", key="qa_submit")
233
 
234
  if submit_qa_button:
 
238
 
239
  documents = []
240
 
 
241
  if uploaded_file:
242
  with st.spinner("Processing PDF..."):
243
  with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
 
246
 
247
  loader = PyPDFLoader(tmp_path)
248
  documents = loader.load_and_split()
 
 
249
  os.unlink(tmp_path)
250
 
 
251
  elif pasted_text.strip():
252
  documents = [Document(page_content=pasted_text)]
 
253
  else:
254
  st.warning("Please upload a PDF or paste some text.")
255
  st.stop()
256
 
 
257
  with st.spinner("Creating embeddings..."):
258
  vectorstore = FAISS.from_documents(documents, embedding)
259
  retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
260
 
 
261
  qa_prompt_template = PromptTemplate(
262
  input_variables=["context", "question"],
263
  template="""You are an AI assistant. Use the following context to answer the question.
 
268
  Answer:"""
269
  )
270
 
 
271
  qa_chain = RetrievalQA.from_chain_type(
272
  llm=llm,
273
  chain_type="stuff",
 
276
  chain_type_kwargs={"prompt": qa_prompt_template}
277
  )
278
 
 
279
  with st.spinner("Generating answer..."):
280
  try:
281
  result = qa_chain({"query": user_query})
 
 
282
  st.markdown("### πŸ’¬ Answer")
283
  st.write(result["result"])
284
 
 
285
  with st.expander("πŸ“„ Source Documents"):
286
  for i, doc in enumerate(result["source_documents"]):
287
  st.write(f"**Source {i+1}:**")
 
295
 
296
  with tab2:
297
  st.header("Content Enhancement Analysis")
 
 
 
298
  enhancement_text = st.text_area("Enter text to analyze and enhance:", height=200, key="enhancement_input")
 
 
299
  submit_enhancement_button = st.button("Analyze & Enhance", key="enhancement_submit")
300
 
301
  if submit_enhancement_button:
 
305
 
306
  with st.spinner("Analyzing content..."):
307
  try:
 
308
  enhancement_chain = enhancement_prompt | llm
 
 
309
  result = enhancement_chain.invoke({"input": enhancement_text})
 
 
310
  result_content = result.content if hasattr(result, 'content') else str(result)
311
 
312
  st.markdown("### πŸ“Š Analysis Results")
313
 
 
314
  try:
 
315
  json_start = result_content.find('{')
316
  json_end = result_content.rfind('}') + 1
317
 
 
319
  json_str = result_content[json_start:json_end]
320
  analysis_data = json.loads(json_str)
321
 
 
322
  st.markdown("#### Scores (1-10)")
323
  col1, col2, col3 = st.columns(3)
324
 
 
334
  answer_score = analysis_data.get('score', {}).get('answerability', 'N/A')
335
  st.metric("Answerability", answer_score)
336
 
 
337
  keywords = analysis_data.get('keywords', [])
338
  if keywords:
339
  st.markdown("#### πŸ”‘ Key Terms")
340
  st.write(", ".join(keywords))
341
 
 
342
  optimized_text = analysis_data.get('optimized_text', '')
343
  if optimized_text:
344
  st.markdown("#### ✨ Optimized Content")
345
  st.text_area("Enhanced version:", value=optimized_text, height=200, key="optimized_output")
 
 
 
 
346
  else:
 
347
  st.markdown("#### Analysis Response")
348
  st.write(result_content)
349
 
350
  except json.JSONDecodeError:
 
351
  st.markdown("#### Analysis Response")
352
  st.write(result_content)
353
 
354
  except Exception as e:
355
  st.error(f"An error occurred during enhancement: {str(e)}")
356
 
357
+ with tab3:
358
+ st.header("🌐 Website GEO Analysis")
359
+ st.markdown("Analyze any website for Generative Engine Optimization (GEO) - how well it performs with AI search engines.")
360
+
361
+ col1, col2 = st.columns([2, 1])
362
+
363
+ with col1:
364
+ website_url = st.text_input("Enter website URL:", placeholder="https://example.com")
365
+
366
+ with col2:
367
+ max_pages = st.selectbox("Pages to analyze:", [1, 3, 5], index=0)
368
+
369
+ analyze_website_button = st.button("πŸ” Analyze Website", key="website_analyze")
370
+
371
+ if analyze_website_button:
372
+ if not website_url.strip():
373
+ st.warning("Please enter a website URL.")
374
+ st.stop()
375
+
376
+ # Add https:// if not present
377
+ if not website_url.startswith(('http://', 'https://')):
378
+ website_url = 'https://' + website_url
379
+
380
+ with st.spinner(f"Analyzing website: {website_url}"):
381
+ try:
382
+ # Extract website content
383
+ pages_data = extract_website_content(website_url, max_pages)
384
+
385
+ if not pages_data:
386
+ st.error("Could not extract content from the website.")
387
+ st.stop()
388
+
389
+ st.success(f"Successfully extracted content from {len(pages_data)} page(s)")
390
+
391
+ # Analyze each page
392
+ all_analyses = []
393
+
394
+ for i, page_data in enumerate(pages_data):
395
+ with st.spinner(f"Analyzing page {i+1}/{len(pages_data)}..."):
396
+ analysis = analyze_page_geo_score(
397
+ page_data['content'],
398
+ page_data['title'],
399
+ llm
400
+ )
401
+
402
+ if 'error' not in analysis:
403
+ analysis['page_data'] = page_data
404
+ all_analyses.append(analysis)
405
+ else:
406
+ st.warning(f"Could not analyze page {i+1}: {analysis['error']}")
407
+
408
+ if all_analyses:
409
+ # Display overall results
410
+ st.markdown("## πŸ“Š GEO Analysis Results")
411
+
412
+ # Calculate average scores
413
+ avg_scores = {}
414
+ score_keys = list(all_analyses[0].get('geo_scores', {}).keys())
415
+
416
+ for key in score_keys:
417
+ scores = [analysis['geo_scores'][key] for analysis in all_analyses if 'geo_scores' in analysis]
418
+ avg_scores[key] = sum(scores) / len(scores) if scores else 0
419
+
420
+ overall_avg = sum(avg_scores.values()) / len(avg_scores) if avg_scores else 0
421
+
422
+ # Display metrics
423
+ st.markdown("### 🎯 Overall GEO Scores")
424
+
425
+ # Main score
426
+ col1, col2, col3 = st.columns([1, 2, 1])
427
+ with col2:
428
+ st.metric("Overall GEO Score", f"{overall_avg:.1f}/10",
429
+ delta=f"{overall_avg - 7.0:.1f}" if overall_avg >= 7.0 else f"{overall_avg - 7.0:.1f}")
430
+
431
+ # Individual scores
432
+ st.markdown("### πŸ“ˆ Detailed Metrics")
433
+ col1, col2, col3, col4 = st.columns(4)
434
+
435
+ metrics_display = [
436
+ ("AI Search Visibility", "ai_search_visibility"),
437
+ ("Query Intent Match", "query_intent_matching"),
438
+ ("Factual Accuracy", "factual_accuracy"),
439
+ ("Conversational Ready", "conversational_readiness")
440
+ ]
441
+
442
+ for i, (display_name, key) in enumerate(metrics_display):
443
+ with [col1, col2, col3, col4][i]:
444
+ score = avg_scores.get(key, 0)
445
+ st.metric(display_name, f"{score:.1f}")
446
+
447
+ col1, col2, col3, col4 = st.columns(4)
448
+
449
+ metrics_display_2 = [
450
+ ("Semantic Richness", "semantic_richness"),
451
+ ("Context Complete", "context_completeness"),
452
+ ("Citation Worthy", "citation_worthiness"),
453
+ ("Multi-Query Cover", "multi_query_coverage")
454
+ ]
455
+
456
+ for i, (display_name, key) in enumerate(metrics_display_2):
457
+ with [col1, col2, col3, col4][i]:
458
+ score = avg_scores.get(key, 0)
459
+ st.metric(display_name, f"{score:.1f}")
460
+
461
+ # Recommendations
462
+ st.markdown("### πŸ’‘ Optimization Recommendations")
463
+
464
+ all_recommendations = []
465
+ all_opportunities = []
466
+
467
+ for analysis in all_analyses:
468
+ all_recommendations.extend(analysis.get('recommendations', []))
469
+ all_opportunities.extend(analysis.get('optimization_opportunities', []))
470
+
471
+ # Remove duplicates
472
+ unique_recommendations = list(set(all_recommendations))
473
+
474
+ for i, rec in enumerate(unique_recommendations[:5], 1):
475
+ st.write(f"**{i}.** {rec}")
476
+
477
+ # Opportunities by priority
478
+ if all_opportunities:
479
+ st.markdown("### πŸš€ Priority Optimizations")
480
+
481
+ high_priority = [opp for opp in all_opportunities if opp.get('priority') == 'high']
482
+ medium_priority = [opp for opp in all_opportunities if opp.get('priority') == 'medium']
483
+
484
+ if high_priority:
485
+ st.markdown("#### πŸ”΄ High Priority")
486
+ for opp in high_priority[:3]:
487
+ st.write(f"**{opp.get('type', 'Optimization')}**: {opp.get('description', 'No description')}")
488
+
489
+ if medium_priority:
490
+ st.markdown("#### 🟑 Medium Priority")
491
+ for opp in medium_priority[:3]:
492
+ st.write(f"**{opp.get('type', 'Optimization')}**: {opp.get('description', 'No description')}")
493
+
494
+ # Detailed page analysis
495
+ with st.expander("πŸ“‹ Detailed Page Analysis"):
496
+ for i, analysis in enumerate(all_analyses):
497
+ page_data = analysis.get('page_data', {})
498
+ st.markdown(f"#### Page {i+1}: {page_data.get('title', 'Unknown Title')}")
499
+ st.write(f"**URL**: {page_data.get('url', 'Unknown')}")
500
+ st.write(f"**Word Count**: {page_data.get('word_count', 0)}")
501
+
502
+ if 'primary_topics' in analysis:
503
+ st.write(f"**Topics**: {', '.join(analysis['primary_topics'])}")
504
+
505
+ if 'entities' in analysis:
506
+ st.write(f"**Entities**: {', '.join(analysis['entities'])}")
507
+
508
+ st.write("---")
509
+
510
+ # Export functionality
511
+ st.markdown("### πŸ“₯ Export Results")
512
+
513
+ if st.button("πŸ“Š Generate Report"):
514
+ report_data = {
515
+ 'website_url': website_url,
516
+ 'analysis_date': time.strftime('%Y-%m-%d %H:%M:%S'),
517
+ 'overall_score': overall_avg,
518
+ 'individual_scores': avg_scores,
519
+ 'recommendations': unique_recommendations,
520
+ 'pages_analyzed': len(all_analyses)
521
+ }
522
+
523
+ st.json(report_data)
524
+ st.success("Report generated! You can copy the JSON above for your records.")
525
+
526
+ else:
527
+ st.error("Could not analyze any pages from the website.")
528
+
529
+ except Exception as e:
530
+ st.error(f"An error occurred during website analysis: {str(e)}")
531
+
532
  # --- Sidebar Information ---
533
  with st.sidebar:
534
  st.markdown("---")
535
  st.markdown("### πŸ”§ Configuration")
536
+ st.markdown("Set your API keys:")
537
  st.code("export GROQ_API_KEY='your-key'")
538
+
539
+ st.markdown("---")
540
+ st.markdown("### πŸ“– GEO Metrics Explained")
541
+ st.markdown("**AI Search Visibility**: Likelihood of appearing in AI search results")
542
+ st.markdown("**Query Intent Matching**: How well content matches user queries")
543
+ st.markdown("**Conversational Readiness**: Suitability for AI chat responses")
544
+ st.markdown("**Citation Worthiness**: Probability of being cited by AI")
545
 
546
  st.markdown("---")
547
  st.markdown("### ℹ️ About")
548
+ st.markdown("This tool analyzes websites for:")
549
+ st.markdown("- πŸ€– AI search optimization")
550
+ st.markdown("- πŸ’¬ LLM compatibility")
551
+ st.markdown("- πŸ“Š GEO scoring")
552
+ st.markdown("- 🎯 Content recommendations")
553
 
 
554
  st.markdown("---")
555
+ st.markdown("*πŸš€ AI Content Optimizer - Built with Streamlit, LangChain, and Groq*")