Victoria31 commited on
Commit
29ab5d0
·
verified ·
1 Parent(s): 3c1ecf3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -3
app.py CHANGED
@@ -11,7 +11,7 @@ import requests
11
  HF_MODEL = "HuggingFaceH4/zephyr-7b-beta"
12
  HF_API_URL = f"https://api-inference.huggingface.co/models/{HF_MODEL}"
13
  headers = {"Authorization": f"Bearer {os.getenv('HF_TOKEN', '').strip()}"}
14
- FILES = [f"Main{i}.txt" for i in range(1, 7)]
15
  CHUNK_SIZE = 500
16
 
17
  # Load and process files
@@ -20,9 +20,13 @@ def process_text_files(file_list, chunk_size=CHUNK_SIZE):
20
  for file in file_list:
21
  try:
22
  with open(file, encoding="utf-8") as f:
23
- plain = f.read()
 
 
 
24
 
25
- plain_text = re.sub(r"\\s+", " ", plain_text).strip()
 
26
 
27
  if not plain_text:
28
  raise ValueError("Empty text file.")
@@ -31,7 +35,9 @@ def process_text_files(file_list, chunk_size=CHUNK_SIZE):
31
  combined_chunks.extend(chunks)
32
  except Exception as e:
33
  print(f"Fehler beim Verarbeiten von {file}: {e}")
 
34
  return combined_chunks
 
35
 
36
  # Embedding + Indexing
37
  def embed_texts(text_list):
 
11
  HF_MODEL = "HuggingFaceH4/zephyr-7b-beta"
12
  HF_API_URL = f"https://api-inference.huggingface.co/models/{HF_MODEL}"
13
  headers = {"Authorization": f"Bearer {os.getenv('HF_TOKEN', '').strip()}"}
14
+ FILES = [f"Main{i}.txt" for i in range(1,1)]
15
  CHUNK_SIZE = 500
16
 
17
  # Load and process files
 
20
  for file in file_list:
21
  try:
22
  with open(file, encoding="utf-8") as f:
23
+ content = f.read()
24
+ except Exception as e:
25
+ print(f"Fehler beim Lesen von {file}: {e}")
26
+ continue
27
 
28
+ try:
29
+ plain_text = re.sub(r"\s+", " ", content).strip()
30
 
31
  if not plain_text:
32
  raise ValueError("Empty text file.")
 
35
  combined_chunks.extend(chunks)
36
  except Exception as e:
37
  print(f"Fehler beim Verarbeiten von {file}: {e}")
38
+
39
  return combined_chunks
40
+
41
 
42
  # Embedding + Indexing
43
  def embed_texts(text_list):