nihalm-collab commited on
Commit
047ef2a
·
verified ·
1 Parent(s): 5b94ebd

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -0
app.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ 📚 KitapYurdu Yorum Asistanı Chatbot
3
+ - Hugging Face Spaces veya Lokal ortamda çalışacak
4
+ """
5
+
6
+ import os
7
+ import streamlit as st
8
+ from datasets import load_dataset
9
+ import chromadb
10
+ from chromadb.config import Settings
11
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
12
+ from langchain.vectorstores import Chroma
13
+ from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
14
+ from langchain.chains import RetrievalQA
15
+ from dotenv import load_dotenv
16
+
17
+ # --- 1. Ortam Değişkenleri
18
+ # Lokal için .env yükle
19
+ if os.path.exists(".env"):
20
+ load_dotenv()
21
+
22
+ GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
23
+ HF_TOKEN = os.environ.get("HF_TOKEN")
24
+
25
+ # --- 2. Streamlit Başlığı
26
+ st.set_page_config(page_title="📖 KitapYurdu Chatbot")
27
+ st.title("📖 KitapYurdu Yorum Asistanı (Gemini 2.0 Flash)")
28
+
29
+ # --- 3. Veri Seti Yükleme
30
+ @st.cache_data
31
+ def load_kitapyurdu_dataset():
32
+ dataset = load_dataset("alibayram/kitapyurdu_yorumlar", split="train", token=HF_TOKEN)
33
+ return dataset
34
+
35
+ st.write("📡 Veri seti yükleniyor...")
36
+ dataset = load_kitapyurdu_dataset()
37
+ st.success("✅ Veri seti yüklendi!")
38
+
39
+ # --- 4. Metinleri Bölme
40
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
41
+ texts = text_splitter.split_text(" ".join(dataset["yorum"][:500])) # İlk 500 yorum örnek
42
+
43
+ # --- 5. ChromaDB
44
+ PERSIST_DIR = "chroma_db"
45
+ os.makedirs(PERSIST_DIR, exist_ok=True)
46
+
47
+ embeddings = GoogleGenerativeAIEmbeddings(
48
+ model="models/embedding-001",
49
+ google_api_key=GEMINI_API_KEY
50
+ )
51
+
52
+ vectorstore = Chroma.from_texts(
53
+ texts,
54
+ embeddings,
55
+ persist_directory=PERSIST_DIR
56
+ )
57
+
58
+ # --- 6. RAG Pipeline
59
+ retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
60
+ llm = ChatGoogleGenerativeAI(
61
+ model="gemini-2.0-flash",
62
+ google_api_key=GEMINI_API_KEY,
63
+ temperature=0.2
64
+ )
65
+ qa_chain = RetrievalQA.from_chain_type(
66
+ llm=llm,
67
+ chain_type="stuff",
68
+ retriever=retriever,
69
+ )
70
+
71
+ # --- 7. Kullanıcı Arayüzü
72
+ st.markdown("### 💬 Kitaplar hakkında soru sor:")
73
+ user_query = st.text_input("Örnek: 'En çok beğenilen kitap hangisi?'", "")
74
+
75
+ if user_query:
76
+ with st.spinner("Yanıt hazırlanıyor..."):
77
+ response = qa_chain.run(user_query)
78
+ st.markdown("### 🧠 Yanıt:")
79
+ st.write(response)