tregu0458 commited on
Commit
45a8b80
·
verified ·
1 Parent(s): 09131b8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -3
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import gradio as gr
2
  from langchain_community.document_loaders import YoutubeLoader
 
3
  from fastapi import FastAPI
4
 
5
  def process_youtube_url(url, language):
@@ -12,12 +13,18 @@ def process_youtube_url(url, language):
12
  docs = loader.load()
13
  text = str(docs)
14
  char_count = len(text)
15
- return text, [], "", char_count
 
 
 
 
 
 
16
  except Exception as e:
17
  error_msg = str(e)
18
  available_languages = extract_available_languages(error_msg)
19
  recommended_language = extract_recommended_language(error_msg)
20
- return f"Error: {error_msg}", available_languages, recommended_language, 0
21
 
22
  def extract_available_languages(error_msg):
23
  languages = []
@@ -47,7 +54,7 @@ iface = gr.Interface(
47
  gr.Dropdown(label="Language",value="ja",choices=["en","en-US", "ja", "fr","de","it"],allow_custom_value=True),
48
  ],
49
  outputs=[
50
- gr.Textbox(label="Loaded Documents / Error Message",show_copy_button=True),
51
  gr.Dropdown(label="Available Languages", allow_custom_value=True),
52
  gr.Textbox(label="Recommended Language"),
53
  gr.Number(label="Character Count")
 
1
  import gradio as gr
2
  from langchain_community.document_loaders import YoutubeLoader
3
+ from langchain.text_splitter import TokenTextSplitter
4
  from fastapi import FastAPI
5
 
6
  def process_youtube_url(url, language):
 
13
  docs = loader.load()
14
  text = str(docs)
15
  char_count = len(text)
16
+
17
+ text_splitter = TokenTextSplitter(chunk_size=32000, chunk_overlap=0)
18
+ chunks = text_splitter.split_text(text)
19
+
20
+ output_textboxes = [gr.Textbox(label=f"Chunk {i+1}", value=chunk, show_copy_button=True) for i, chunk in enumerate(chunks)]
21
+
22
+ return output_textboxes, [], "", char_count
23
  except Exception as e:
24
  error_msg = str(e)
25
  available_languages = extract_available_languages(error_msg)
26
  recommended_language = extract_recommended_language(error_msg)
27
+ return [gr.Textbox(label="Error", value=error_msg, show_copy_button=True)], available_languages, recommended_language, 0
28
 
29
  def extract_available_languages(error_msg):
30
  languages = []
 
54
  gr.Dropdown(label="Language",value="ja",choices=["en","en-US", "ja", "fr","de","it"],allow_custom_value=True),
55
  ],
56
  outputs=[
57
+ gr.Column(label="Loaded Documents / Error Message"),
58
  gr.Dropdown(label="Available Languages", allow_custom_value=True),
59
  gr.Textbox(label="Recommended Language"),
60
  gr.Number(label="Character Count")