Spaces:

Victoria31
/

LehrChat

Sleeping

Victoria31 commited on Apr 25, 2025

Commit

29ab5d0

verified ·

1 Parent(s): 3c1ecf3

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -11,7 +11,7 @@ import requests
 HF_MODEL = "HuggingFaceH4/zephyr-7b-beta"
 HF_API_URL = f"https://api-inference.huggingface.co/models/{HF_MODEL}"
 headers = {"Authorization": f"Bearer {os.getenv('HF_TOKEN', '').strip()}"}
-FILES = [f"Main{i}.txt" for i in range(1, 7)]
 CHUNK_SIZE = 500
 # Load and process files
@@ -20,9 +20,13 @@ def process_text_files(file_list, chunk_size=CHUNK_SIZE):
     for file in file_list:
         try:
             with open(file, encoding="utf-8") as f:
-                plain = f.read()
-            plain_text = re.sub(r"\\s+", " ", plain_text).strip()
             if not plain_text:
                 raise ValueError("Empty text file.")
@@ -31,7 +35,9 @@ def process_text_files(file_list, chunk_size=CHUNK_SIZE):
             combined_chunks.extend(chunks)
         except Exception as e:
             print(f"Fehler beim Verarbeiten von {file}: {e}")
     return combined_chunks
 # Embedding + Indexing
 def embed_texts(text_list):

 HF_MODEL = "HuggingFaceH4/zephyr-7b-beta"
 HF_API_URL = f"https://api-inference.huggingface.co/models/{HF_MODEL}"
 headers = {"Authorization": f"Bearer {os.getenv('HF_TOKEN', '').strip()}"}
+FILES = [f"Main{i}.txt" for i in range(1,1)]
 CHUNK_SIZE = 500
 # Load and process files
     for file in file_list:
         try:
             with open(file, encoding="utf-8") as f:
+                content = f.read()
+        except Exception as e:
+            print(f"Fehler beim Lesen von {file}: {e}")
+            continue
+        try:
+            plain_text = re.sub(r"\s+", " ", content).strip()
             if not plain_text:
                 raise ValueError("Empty text file.")
             combined_chunks.extend(chunks)
         except Exception as e:
             print(f"Fehler beim Verarbeiten von {file}: {e}")
     return combined_chunks
 # Embedding + Indexing
 def embed_texts(text_list):