Spaces:
Sleeping
Sleeping
File size: 6,099 Bytes
80b75c5 d20c2f3 b7610d0 bc0238f 39e3df4 da9b3be a5e9d9d 107a8b5 f6859b1 9423662 b5406cb 9423662 a5e9d9d b5406cb f6859b1 b5406cb d20c2f3 29ab5d0 d20c2f3 f6859b1 80b75c5 d20c2f3 29ab5d0 d20c2f3 c88b61d ba9d15f da9b3be f6859b1 b5406cb f6859b1 4458366 b5406cb 4458366 692a3ab 4458366 61f8be2 4458366 d20c2f3 692a3ab 4458366 d20c2f3 80b75c5 a5e9d9d 9423662 692a3ab 9423662 b5406cb 9423662 b5406cb 9423662 b7610d0 9423662 b5406cb d20c2f3 9f1e7e8 9423662 d20c2f3 f6859b1 b5406cb da9b3be f6859b1 b5406cb f6859b1 b8954f7 d20c2f3 b5406cb f6859b1 d20c2f3 b5406cb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 |
import gradio as gr
import os
import re
import requests
import numpy as np
import torch
from sklearn.neighbors import NearestNeighbors
from sentence_transformers import SentenceTransformer
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
# --- CONFIGURATION ---
HF_TOKEN = os.getenv("HF_TOKEN", "").strip()
HF_MODEL = "HuggingFaceH4/zephyr-7b-beta" # Change this if needed
HF_API_URL = f"https://api-inference.huggingface.co/models/{HF_MODEL}"
headers = {"Authorization": f"Bearer {HF_TOKEN}"}
FILES = ["main1.txt", "main2.txt", "main3.txt", "main4.txt", "main5.txt", "main6.txt"]
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
EMBEDDING_CACHE_FILE = "embeddings.npy"
CHUNKS_CACHE_FILE = "chunks.npy"
# --- FUNCTIONS ---
def test_model_connection():
try:
print("🔍 Testing Hugging Face model availability...")
test_response = requests.get(HF_API_URL, headers=headers, timeout=10)
print("Status Code:", test_response.status_code)
print("Response JSON:", test_response.json())
except Exception as e:
print("❌ Connection Test Failed:", e)
def load_text_files(file_list):
knowledge = ""
for file_name in file_list:
try:
with open(file_name, "r", encoding="utf-8") as f:
knowledge += "\n" + f.read()
except Exception as e:
print(f"Error reading {file_name}: {e}")
return knowledge.strip()
def chunk_text(text, max_chunk_length=500):
sentences = re.split(r'(?<=[.!?])\s+', text)
chunks = []
current_chunk = ""
for sentence in sentences:
if len(current_chunk) + len(sentence) <= max_chunk_length:
current_chunk += " " + sentence
else:
chunks.append(current_chunk.strip())
current_chunk = sentence
if current_chunk:
chunks.append(current_chunk.strip())
return chunks
def embed_texts(texts):
return model.encode(texts, convert_to_numpy=True, normalize_embeddings=True)
def save_cache(embeddings, chunks):
np.save(EMBEDDING_CACHE_FILE, embeddings)
np.save(CHUNKS_CACHE_FILE, np.array(chunks))
def load_cache():
if os.path.exists(EMBEDDING_CACHE_FILE) and os.path.exists(CHUNKS_CACHE_FILE):
embeddings = np.load(EMBEDDING_CACHE_FILE, allow_pickle=True)
chunks = np.load(CHUNKS_CACHE_FILE, allow_pickle=True).tolist()
print("✅ Loaded cached embeddings and chunks.")
return embeddings, chunks
return None, None
def retrieve_chunks(query, top_k=5):
query_embedding = embed_texts([query])
distances, indices = nn_model.kneighbors(query_embedding, n_neighbors=top_k)
return [chunks[i] for i in indices[0]]
def build_prompt(question):
relevant_chunks = retrieve_chunks(question)
context = "\n".join(relevant_chunks)
system_instruction = """You are an AI-supported financial expert. Your role is to answer questions strictly within the context of the university lecture "Financial Markets" (Universität Duisburg-Essen).
Important instructions:
1. Base your answers primarily on the provided lecture excerpts ("lecture_slides").
2. If an answer is not directly covered by the lecture content, you may elaborate — but **only if you are absolutely certain**. Avoid making up information.
3. If you are unsure, reply politely:
"Entschuldigung. Leider kenne ich die Antwort auf diese Frage nicht."
4. If a formula is relevant, show the **exact formula** and explain it in **simple terms**.
5. Do not give vague or speculative answers — it's better to skip a question than guess.
6. **Never generate your own questions. Only respond to the given question.**
7. **Always respond in German.**
8. Make your answers clear, fact-based, and well-structured.
"""
prompt = f"""{system_instruction}
Vorlesungsinhalte:
{context}
--- Ende der Vorlesungsinhalte ---
Frage des Nutzers (bitte nur diese beantworten): {question}
Antwort:"""
return prompt
def respond(message, history):
try:
prompt = build_prompt(message)
payload = {
"inputs": prompt,
"parameters": {
"temperature": 0.2,
"max_new_tokens": 400,
"stop": ["Frage:", "Question:", "User:", "Frage des Nutzers"]
},
}
response = requests.post(HF_API_URL, headers=headers, json=payload, timeout=60)
response.raise_for_status()
output = response.json()
if isinstance(output, list) and "generated_text" in output[0]:
generated_text = output[0]["generated_text"]
answer = generated_text[len(prompt):].strip()
else:
print("❗️HF API returned unexpected format:", output)
answer = "❌ Modell hat keine gültige Antwort geliefert. Bitte später erneut versuchen."
except Exception as e:
print("API Error:", e)
try:
print("Raw HF response:", response.text)
except:
pass
answer = "❌ Error contacting the model. Please check your token, timeout, or model availability."
if history is None:
history = []
history.append({"role": "assistant", "content": answer})
return answer
# --- INIT SECTION ---
print("🔄 Initializing embedding model...")
model = SentenceTransformer(EMBEDDING_MODEL)
chunk_embeddings, chunks = load_cache()
if chunk_embeddings is None or chunks is None:
print("🛠 No cache found. Processing text...")
knowledge_base = load_text_files(FILES)
chunks = chunk_text(knowledge_base)
chunk_embeddings = embed_texts(chunks)
save_cache(chunk_embeddings, chunks)
print("✅ Embeddings and chunks cached.")
nn_model = NearestNeighbors(metric="cosine")
nn_model.fit(chunk_embeddings)
# --- GRADIO INTERFACE ---
demo = gr.ChatInterface(
fn=respond,
title="📚 RAG Chatbot Finanzmärkte",
description="Stelle Fragen basierend auf den hochgeladenen Vorlesungstexten.",
chatbot=gr.Chatbot(type="messages"),
)
if __name__ == "__main__":
test_model_connection()
demo.launch(debug=True)
|