Spaces:
Runtime error
Runtime error
File size: 3,842 Bytes
173734e b551f1b f3a8bd6 b551f1b edf6511 b551f1b 0d715e7 b551f1b 0d715e7 b551f1b 0d715e7 b551f1b 0d715e7 b551f1b 0d715e7 b551f1b 0d715e7 b551f1b 0d715e7 b551f1b f3a8bd6 b551f1b f3a8bd6 b551f1b eb8fb28 b551f1b f3a8bd6 b551f1b 2205b6c 0d715e7 b551f1b 0d715e7 b551f1b 11b5c79 0d715e7 b551f1b 0d715e7 b551f1b f3a8bd6 b551f1b 5547601 b551f1b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 | from sentence_transformers import SentenceTransformer
from PyPDF2 import PdfReader
import tiktoken
import groq
#import asyncio
#from groq import AsyncGroq
import faiss
import numpy as np
import gradio as gr
import json
import os
import pickle
# == Buat folder models ==
os.makedirs("models", exist_ok=True)
# == Load API Key dari File ==
def load_api_key():
with open("config.json", "r") as f:
config = json.load(f)
return config["GROQ_API_KEY"]
GROQ_API_KEY = load_api_key()
# == Ekstraksi Teks dari PDF ==
def extract_text_from_pdf(pdf_file: str) -> str:
with open(pdf_file, 'rb') as pdf:
reader = PdfReader(pdf)
text = " ".join(page.extract_text() or "" for page in reader.pages)
return text
# == Chunking Teks ==
def chunk_text(text: str, max_tokens: int = 512) -> list:
tokenizer = tiktoken.get_encoding("cl100k_base")
tokens = tokenizer.encode(text)
chunks = []
for i in range(0, len(tokens), max_tokens):
chunk_tokens = tokens[i:i+max_tokens]
chunk_text = tokenizer.decode(chunk_tokens)
chunks.append(chunk_text)
return chunks
# == Embedding dengan SentenceTransformer ==
model = SentenceTransformer('all-MiniLM-L6-v2') # Global model
def get_embedding(text: str):
return np.array(model.encode(text), dtype=np.float32)
# == Setup FAISS ==
d = 384 # Dimensi embedding sesuai dengan model
index = faiss.IndexFlatL2(d)
text_chunks = []
def add_to_db(text_chunks_local):
global text_chunks
text_chunks = text_chunks_local
embeddings = np.array([get_embedding(text) for text in text_chunks], dtype=np.float32).reshape(-1, d)
index.add(embeddings)
def search_db(query, k=5):
if index.ntotal == 0:
return ["Database masih kosong, silakan tambahkan data."]
query_embedding = np.array([get_embedding(query)], dtype=np.float32).reshape(1, -1)
distances, indices = index.search(query_embedding, k)
return [text_chunks[i] for i in indices[0] if i < len(text_chunks)]
def save_to_faiss(index_path="vector_index.faiss"):
faiss.write_index(index, index_path)
def load_faiss(index_path="vector_index.faiss"):
global index
index = faiss.read_index(index_path)
def save_embeddings(embeddings_path="models/embeddings.pkl"):
with open(embeddings_path, "wb") as f:
pickle.dump(index, f)
def load_embeddings(embeddings_path="models/embeddings.pkl"):
global index
with open(embeddings_path, "rb") as f:
index = pickle.load(f)
# == Integrasi LLaMA via Groq API ==
client = groq.Client(api_key=GROQ_API_KEY)
#client = AsyncGroq(api_key=GROQ_API_KEY)
def query_llama(prompt):
response = client.chat.completions.create(
model="llama3-8b-8192",
messages=[{"role": "user", "content": prompt}],
max_tokens=512
)
return response.choices[0].message.content.strip()
# == Main Workflow ==
if __name__ == '__main__':
pdf_text = extract_text_from_pdf('arafa-produk-2025.pdf')
text_chunks = chunk_text(pdf_text, max_tokens=1024)
add_to_db(text_chunks)
save_to_faiss()
save_embeddings()
retrieved_chunks = search_db("Apa isi dokumen ini?")
context = "\n".join(retrieved_chunks)
prompt = f"Gunakan informasi berikut untuk menjawab:\n{context}\n\nPertanyaan: Apa isi dokumen ini?"
answer = query_llama(prompt)
print(answer)
print("Gradio Version:", gr. __version__)
# == Chatbot Interface ==
def chatbot_interface(user_query):
retrieved_chunks = search_db(user_query)
context = "\n".join(retrieved_chunks)
prompt = f"Gunakan informasi berikut untuk menjawab:\n{context}\n\nPertanyaan: {user_query}"
answer = query_llama(prompt)
return answer
iface = gr.Interface(fn=chatbot_interface, inputs="text", outputs="text", title="ARAFATEA RAG ex Clone")
iface.launch()
|