Jagukumar commited on
Commit
e03340e
·
verified ·
1 Parent(s): 1b10704

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -42
app.py CHANGED
@@ -1,42 +1,52 @@
1
- from processing import extract_text, preprocess_text_generalized, get_embeddings_from_huggingface
2
- import gradio as gr
3
- import numpy as np
4
-
5
- def process_file(file_path):
6
- try:
7
- # Step 1: Extract text
8
- extracted_text = extract_text(file_path)
9
-
10
- # Step 2: Preprocess text
11
- cleaned_text = preprocess_text_generalized(extracted_text)
12
-
13
- # Step 3: Generate embeddings
14
- embeddings = get_embeddings_from_huggingface(cleaned_text)
15
-
16
- # Step 4: Save embeddings to a temporary file
17
- temp_file_path = "embeddings.npy"
18
- np.save(temp_file_path, embeddings)
19
-
20
- # Return the top 10 embeddings and the file path for download
21
- top_10_embeddings = embeddings[:10].tolist()
22
- return f"Top 10 Embeddings: {top_10_embeddings}", temp_file_path
23
- except Exception as e:
24
- return str(e), None
25
-
26
- # Define Gradio Interface
27
- interface = gr.Interface(
28
- fn=process_file,
29
- inputs=gr.File(label="Upload a file (CSV, PDF, JSON)", type="filepath"),
30
- outputs=[
31
- gr.Textbox(label="Top 10 Embeddings"),
32
- gr.File(label="Download Full Embeddings"),
33
- ],
34
- title="Embedding Converter Using Hugging Face Model",
35
- description=(
36
- "Upload a file (CSV, PDF, or JSON) to generate embeddings using "
37
- "Hugging Face models. View the top 10 embeddings and download entire embedding file."
38
- ),
39
- )
40
-
41
- if __name__ == "__main__":
42
- interface.launch()
 
 
 
 
 
 
 
 
 
 
 
1
+ from processing import extract_text, preprocess_text_generalized, get_embeddings_from_huggingface
2
+ import gradio as gr
3
+ import numpy as np
4
+ import spacy
5
+ import os
6
+
7
+ # Check if SpaCy model is downloaded; if not, download it
8
+ try:
9
+ nlp = spacy.load("en_core_web_sm")
10
+ except OSError:
11
+ os.system("python -m spacy download en_core_web_sm")
12
+ nlp = spacy.load("en_core_web_sm")
13
+
14
+
15
+ def process_file(file_path):
16
+ try:
17
+ # Step 1: Extract text
18
+ extracted_text = extract_text(file_path)
19
+
20
+ # Step 2: Preprocess text
21
+ cleaned_text = preprocess_text_generalized(extracted_text)
22
+
23
+ # Step 3: Generate embeddings
24
+ embeddings = get_embeddings_from_huggingface(cleaned_text)
25
+
26
+ # Step 4: Save embeddings to a temporary file
27
+ temp_file_path = "embeddings.npy"
28
+ np.save(temp_file_path, embeddings)
29
+
30
+ # Return the top 10 embeddings and the file path for download
31
+ top_10_embeddings = embeddings[:10].tolist()
32
+ return f"Top 10 Embeddings: {top_10_embeddings}", temp_file_path
33
+ except Exception as e:
34
+ return str(e), None
35
+
36
+ # Define Gradio Interface
37
+ interface = gr.Interface(
38
+ fn=process_file,
39
+ inputs=gr.File(label="Upload a file (CSV, PDF, JSON)", type="filepath"),
40
+ outputs=[
41
+ gr.Textbox(label="Top 10 Embeddings"),
42
+ gr.File(label="Download Full Embeddings"),
43
+ ],
44
+ title="Embedding Converter Using Hugging Face Model",
45
+ description=(
46
+ "Upload a file (CSV, PDF, or JSON) to generate embeddings using "
47
+ "Hugging Face models. View the top 10 embeddings and download entire embedding file."
48
+ ),
49
+ )
50
+
51
+ if __name__ == "__main__":
52
+ interface.launch()