asad231's picture
Update app.py
5c58f4c verified
# import os
# from langchain.text_splitter import RecursiveCharacterTextSplitter
# import gradio as gr
# # Ensure data directory exists
# os.makedirs("data", exist_ok=True)
# # Function to process uploaded files
# def process_files(italy_file, france_file):
# docs = []
# text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
# # Save uploaded files
# file_map = {
# "data/italy.txt": italy_file,
# "data/france.txt": france_file
# }
# for save_path, file_obj in file_map.items():
# if file_obj is not None:
# with open(save_path, "wb") as f:
# f.write(file_obj.read())
# # Process files
# for filename in file_map:
# try:
# with open(filename, "r", encoding="utf-8") as f:
# content = f.read()
# split_docs = text_splitter.create_documents([content])
# docs.extend(split_docs)
# except FileNotFoundError:
# return f"❌ File not found: {filename}", []
# except Exception as e:
# return f"⚠️ Error: {e}", []
# output = [doc.page_content for doc in docs]
# return f"βœ… Processed {len(output)} chunks.", output[:10] # Show first 10 chunks
# # Gradio Interface
# interface = gr.Interface(
# fn=process_files,
# inputs=[
# gr.File(label="Upload italy.txt"),
# gr.File(label="Upload france.txt")
# ],
# outputs=[
# gr.Textbox(label="Status"),
# gr.Textbox(label="First 10 Chunks (Preview)", lines=20)
# ],
# title="LangChain Text Splitter App",
# description="Upload italy.txt and france.txt to split their contents into 300-character chunks using LangChain."
# )
# if __name__ == "__main__":
# interface.launch()
import os
import gradio as gr
from langchain.text_splitter import RecursiveCharacterTextSplitter
import shutil
# Ensure data folder exists
os.makedirs("data", exist_ok=True)
def process_files(italy_file, france_file):
docs = []
text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
try:
file_paths = {}
# Move italy file
if italy_file is not None:
italy_path = "data/italy.txt"
shutil.copy(italy_file.name, italy_path)
file_paths["Italy"] = italy_path
# Move france file
if france_file is not None:
france_path = "data/france.txt"
shutil.copy(france_file.name, france_path)
file_paths["France"] = france_path
except Exception as e:
return f"❌ Error while saving files: {e}", []
# Process the files
try:
for country, path in file_paths.items():
with open(path, "r", encoding="utf-8") as f:
content = f.read()
chunks = text_splitter.create_documents([content])
docs.extend(chunks)
except Exception as e:
return f"❌ Error while reading files: {e}", []
return f"βœ… Successfully split into {len(docs)} chunks.", [doc.page_content for doc in docs[:10]]
# Gradio Interface
gr.Interface(
fn=process_files,
inputs=[
gr.File(label="Upload italy.txt"),
gr.File(label="Upload france.txt")
],
outputs=[
gr.Textbox(label="Status"),
gr.Textbox(label="First 10 Chunks (Preview)", lines=20)
],
title="Text Chunking App (LangChain)",
description="Upload two .txt files (Italy & France). They will be split into 300-character chunks."
).launch()