Spaces:

rrayhka
/

summary-ppw

Sleeping

App Files Files Community

summary-ppw / app.py

rrayhka

Update app.py

a0ba20b verified over 1 year ago

raw

history blame contribute delete

4.73 kB

	from flask import Flask, request, render_template, jsonify
	import pandas as pd
	import requests
	import os
	import re
	import networkx as nx
	from nltk.tokenize import word_tokenize, sent_tokenize
	from nltk.corpus import stopwords
	from bs4 import BeautifulSoup
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.metrics.pairwise import cosine_similarity
	import matplotlib.pyplot as plt
	import nltk

	# Inisialisasi NLTK
	nltk.download("stopwords")
	nltk.download("punkt")
	nltk.download('punkt_tab')

	# Inisialisasi Flask
	app = Flask(__name__)

	# Fungsi untuk scraping berita
	def scrape_news(url):
	isi = []
	judul = []

	headers = {
	"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
	}
	try:
	response = requests.get(url, headers=headers)
	response.raise_for_status()
	article_full = BeautifulSoup(response.content, "html.parser")
	judul_artikel = article_full.find("h1", class_="mb-4 text-32 font-extrabold")
	if judul_artikel:
	judul_artikel = judul_artikel.text.strip()
	else:
	judul_artikel = "Judul tidak ditemukan"
	artikel_element = article_full.find("div", class_="detail-text")
	if artikel_element:
	artikel_teks = [p.get_text(strip=True) for p in artikel_element.find_all("p")]
	artikel_content = "\n".join(artikel_teks)
	else:
	artikel_content = "Konten artikel tidak ditemukan"
	isi.append(artikel_content)
	judul.append(judul_artikel)
	except requests.exceptions.RequestException as e:
	judul.append("Error")
	isi.append(f"Gagal mengambil data: {e}")

	return pd.DataFrame({"judul": judul, "isi": isi})

	# Fungsi preprocessing
	def preprocess_text(content):
	content = content.lower()
	content = re.sub(r"[0-9]\|[/(){}\[\]\\|@,;_]\|[^a-z .]+", " ", content)
	content = re.sub(r"\s+", " ", content).strip()
	tokens = word_tokenize(content)
	stopword = set(stopwords.words("indonesian"))
	tokens = [word for word in tokens if word not in stopword]
	return " ".join(tokens)

	# Fungsi untuk membuat ringkasan dan visualisasi graf
	def summarize_and_visualize(content):
	kalimat = sent_tokenize(content)
	preprocessed_text = preprocess_text(content)
	kalimat_preprocessing = sent_tokenize(preprocessed_text)

	# TF-IDF dan cosine similarity
	tfidf_vectorizer = TfidfVectorizer()
	tfidf_matrix = tfidf_vectorizer.fit_transform(kalimat_preprocessing)
	cossim_prep = cosine_similarity(tfidf_matrix, tfidf_matrix)

	# Analisis jaringan dengan NetworkX
	G = nx.DiGraph()
	for i in range(len(cossim_prep)):
	G.add_node(i)
	for j in range(len(cossim_prep)):
	if cossim_prep[i][j] > 0.1 and i != j:
	G.add_edge(i, j)

	# Hitung closeness centrality dan buat ringkasan
	closeness_scores = nx.closeness_centrality(G)
	sorted_closeness = sorted(closeness_scores.items(), key=lambda x: x[1], reverse=True)
	ringkasan = " ".join(kalimat[node] for node, _ in sorted_closeness[:3])

	# Visualisasi graf
	plt.figure(figsize=(10, 8))
	pos = nx.spring_layout(G, k=2)
	nx.draw_networkx_nodes(G, pos, node_size=500, node_color="b")
	nx.draw_networkx_edges(G, pos, edge_color="red", arrows=True)
	nx.draw_networkx_labels(G, pos, font_size=10)
	plt.title("Graph Representation of Sentence Similarity")
	# Periksa apakah file graph.png sudah ada
	graph_path = "static/graph.png"
	if os.path.exists(graph_path):
	os.remove(graph_path) # Hapus file jika sudah ada

	# Simpan graf sebagai file baru
	plt.savefig(graph_path)
	plt.close()

	return ringkasan

	# Route utama untuk scraping dan analisis
	@app.route("/", methods=["GET", "POST"])
	def index():
	if request.method == "POST":
	url = request.form.get("url")
	if url:
	# Scraping berita
	df = scrape_news(url)
	if not df.empty:
	content = df["isi"].iloc[0]
	title = df["judul"].iloc[0]

	# Preprocessing, summarizing, and visualizing
	ringkasan = summarize_and_visualize(content)
	return render_template("result.html", title=title, content=content, summary=ringkasan, graph_url="static/graph.png")
	else:
	return render_template("summary.html", error="Gagal mengambil data dari URL.")
	else:
	return render_template("summary.html", error="URL tidak boleh kosong.")
	return render_template("summary.html")

	# Menjalankan aplikasi Flask
	if __name__ == "__main__":
	app.run(debug=True, port=5002)