Spaces:
Runtime error
Runtime error
| # import os | |
| # from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| # import gradio as gr | |
| # # Ensure data directory exists | |
| # os.makedirs("data", exist_ok=True) | |
| # # Function to process uploaded files | |
| # def process_files(italy_file, france_file): | |
| # docs = [] | |
| # text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50) | |
| # # Save uploaded files | |
| # file_map = { | |
| # "data/italy.txt": italy_file, | |
| # "data/france.txt": france_file | |
| # } | |
| # for save_path, file_obj in file_map.items(): | |
| # if file_obj is not None: | |
| # with open(save_path, "wb") as f: | |
| # f.write(file_obj.read()) | |
| # # Process files | |
| # for filename in file_map: | |
| # try: | |
| # with open(filename, "r", encoding="utf-8") as f: | |
| # content = f.read() | |
| # split_docs = text_splitter.create_documents([content]) | |
| # docs.extend(split_docs) | |
| # except FileNotFoundError: | |
| # return f"β File not found: {filename}", [] | |
| # except Exception as e: | |
| # return f"β οΈ Error: {e}", [] | |
| # output = [doc.page_content for doc in docs] | |
| # return f"β Processed {len(output)} chunks.", output[:10] # Show first 10 chunks | |
| # # Gradio Interface | |
| # interface = gr.Interface( | |
| # fn=process_files, | |
| # inputs=[ | |
| # gr.File(label="Upload italy.txt"), | |
| # gr.File(label="Upload france.txt") | |
| # ], | |
| # outputs=[ | |
| # gr.Textbox(label="Status"), | |
| # gr.Textbox(label="First 10 Chunks (Preview)", lines=20) | |
| # ], | |
| # title="LangChain Text Splitter App", | |
| # description="Upload italy.txt and france.txt to split their contents into 300-character chunks using LangChain." | |
| # ) | |
| # if __name__ == "__main__": | |
| # interface.launch() | |
| import os | |
| import gradio as gr | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| import shutil | |
| # Ensure data folder exists | |
| os.makedirs("data", exist_ok=True) | |
| def process_files(italy_file, france_file): | |
| docs = [] | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50) | |
| try: | |
| file_paths = {} | |
| # Move italy file | |
| if italy_file is not None: | |
| italy_path = "data/italy.txt" | |
| shutil.copy(italy_file.name, italy_path) | |
| file_paths["Italy"] = italy_path | |
| # Move france file | |
| if france_file is not None: | |
| france_path = "data/france.txt" | |
| shutil.copy(france_file.name, france_path) | |
| file_paths["France"] = france_path | |
| except Exception as e: | |
| return f"β Error while saving files: {e}", [] | |
| # Process the files | |
| try: | |
| for country, path in file_paths.items(): | |
| with open(path, "r", encoding="utf-8") as f: | |
| content = f.read() | |
| chunks = text_splitter.create_documents([content]) | |
| docs.extend(chunks) | |
| except Exception as e: | |
| return f"β Error while reading files: {e}", [] | |
| return f"β Successfully split into {len(docs)} chunks.", [doc.page_content for doc in docs[:10]] | |
| # Gradio Interface | |
| gr.Interface( | |
| fn=process_files, | |
| inputs=[ | |
| gr.File(label="Upload italy.txt"), | |
| gr.File(label="Upload france.txt") | |
| ], | |
| outputs=[ | |
| gr.Textbox(label="Status"), | |
| gr.Textbox(label="First 10 Chunks (Preview)", lines=20) | |
| ], | |
| title="Text Chunking App (LangChain)", | |
| description="Upload two .txt files (Italy & France). They will be split into 300-character chunks." | |
| ).launch() | |