madankn79 commited on
Commit
967e77c
·
1 Parent(s): 2ddda43

testing again

Browse files
Files changed (4) hide show
  1. .idea/.gitignore +13 -0
  2. app.py +130 -0
  3. requirements.txt +24 -0
  4. xindus_dataset.csv +24 -0
.idea/.gitignore ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Default ignored files
2
+ /shelf/
3
+ /workspace.xml
4
+ # Editor-based HTTP Client requests
5
+ /httpRequests/
6
+ # Datasource local storage ignored files
7
+ /dataSources/
8
+ /dataSources.local.xml
9
+ /aws.xml
10
+ /misc.xml
11
+ /modules.xml
12
+ /vcs.xml
13
+ /xindus_t5base.iml
app.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import pandas as pd
4
+ from datasets import Dataset
5
+ from transformers import (
6
+ T5Tokenizer,
7
+ T5ForConditionalGeneration,
8
+ Trainer,
9
+ TrainingArguments
10
+ )
11
+ from huggingface_hub import login
12
+ from spaces import GPU # Required for ZeroGPU Spaces
13
+
14
+ import transformers
15
+ print("🔥 Transformers version:", transformers.__version__)
16
+
17
+ import torch
18
+ device = torch.device("cpu")
19
+
20
+ login(token=os.getenv("HF_TOKEN")) # Using environment variable
21
+ # Disable CUDA and set environment variables for Spaces with Stateless GPU
22
+ os.environ["CUDA_VISIBLE_DEVICES"] = "" # Ensure no GPUs are used
23
+ os.environ["ACCELERATE_DISABLE"] = "true" # Disable Accelerate for safety
24
+
25
+ model_name = "madankn/xindus_t5base"
26
+
27
+ # Load tokenizer and model once
28
+ tokenizer = T5Tokenizer.from_pretrained("t5-base")
29
+ model = T5ForConditionalGeneration.from_pretrained("t5-base")
30
+ model.to("cpu") # Ensure model stays on CPU in main process
31
+
32
+ def preprocess(example):
33
+ inputs = tokenizer(
34
+ example["text"],
35
+ padding="max_length",
36
+ truncation=True,
37
+ max_length=512,
38
+ )
39
+ labels = tokenizer(
40
+ example["summary"],
41
+ padding="max_length",
42
+ truncation=True,
43
+ max_length=128,
44
+ )
45
+ return {
46
+ "input_ids": inputs["input_ids"],
47
+ "attention_mask": inputs["attention_mask"],
48
+ "labels": labels["input_ids"]
49
+ }
50
+
51
+ # Load fine-tuned model for inference
52
+ def load_model():
53
+ global tokenizer, model
54
+ if tokenizer is None or model is None:
55
+ tokenizer = T5Tokenizer.from_pretrained("./fine_tuned_t5")
56
+ model = T5ForConditionalGeneration.from_pretrained("./fine_tuned_t5")
57
+ model.to("cpu")
58
+ return model, tokenizer
59
+
60
+ def train_model():
61
+ # 🔁 Reload model/tokenizer inside training function to avoid stale GPU bindings
62
+ tokenizer = T5Tokenizer.from_pretrained("t5-base")
63
+ model = T5ForConditionalGeneration.from_pretrained("t5-base").to(device)
64
+
65
+ df = pd.read_csv("xindus_dataset.csv")
66
+ df = df.rename(columns={"text_column_name": "text", "summary_column_name": "summary"})
67
+ dataset = Dataset.from_pandas(df).train_test_split(test_size=0.1)
68
+
69
+ tokenized_datasets = dataset.map(
70
+ preprocess,
71
+ batched=True,
72
+ remove_columns=dataset["train"].column_names
73
+ )
74
+
75
+ training_args = TrainingArguments(
76
+ output_dir="./results",
77
+ logging_dir="./logs",
78
+ logging_steps=50,
79
+ save_steps=200,
80
+ num_train_epochs=1,
81
+ per_device_train_batch_size=2,
82
+ per_device_eval_batch_size=2,
83
+ weight_decay=0.01,
84
+ learning_rate=2e-5,
85
+ save_total_limit=1,
86
+ push_to_hub=True,
87
+ hub_model_id=model_name,
88
+ hub_strategy="every_save",
89
+ no_cuda=True # 🧠 Critical: disable all CUDA use
90
+ )
91
+
92
+ trainer = Trainer(
93
+ model=model,
94
+ args=training_args,
95
+ train_dataset=tokenized_datasets["train"],
96
+ eval_dataset=tokenized_datasets["test"],
97
+ tokenizer=tokenizer,
98
+ )
99
+
100
+ trainer.train()
101
+ model.save_pretrained("./fine_tuned_t5")
102
+ tokenizer.save_pretrained("./fine_tuned_t5")
103
+ model.push_to_hub(model_name)
104
+ tokenizer.push_to_hub(model_name)
105
+
106
+ return f"✅ Training complete and pushed to: https://huggingface.co/{model_name}"
107
+
108
+ # Summarize function using GPU
109
+ @GPU
110
+ def summarize(text):
111
+ model, tokenizer = load_model()
112
+ input_ids = tokenizer("summarize: " + text, return_tensors="pt", truncation=True).input_ids
113
+ output_ids = model.generate(input_ids, max_length=50)
114
+ return tokenizer.decode(output_ids[0], skip_special_tokens=True)
115
+
116
+ # Gradio UI
117
+ train_button = gr.Interface(fn=train_model, inputs=[], outputs="text", title="Train T5 on Xindus Data")
118
+ summarize_interface = gr.Interface(fn=summarize, inputs="text", outputs="text", title="Summarize with Fine-Tuned T5")
119
+
120
+ # Combine interfaces
121
+ def combined_interface():
122
+ with gr.Blocks() as demo:
123
+ with gr.Tab("Training"):
124
+ train_button.render()
125
+ with gr.Tab("Summarization"):
126
+ summarize_interface.render()
127
+ demo.launch()
128
+
129
+ # Launch app
130
+ combined_interface()
requirements.txt ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Core libraries
2
+ transformers>=4.38.0
3
+ datasets>=2.18.0
4
+ pandas>=1.5.3
5
+
6
+ # For tokenization and model loading
7
+ sentencepiece>=0.1.99
8
+ protobuf<4.0.0 # Avoid some compatibility issues with tokenizers
9
+
10
+ # Gradio interface
11
+ gradio>=4.14.0
12
+
13
+ # Hugging Face Hub access
14
+ huggingface_hub>=0.20.0
15
+
16
+ # For GPU / inference in Spaces (ZeroGPU, if needed)
17
+ spaces==0.22.0 # or latest version if you're using @GPU
18
+
19
+ # Optional logging and evaluation
20
+ scikit-learn>=1.2.2
21
+
22
+ # Misc
23
+ numpy>=1.23.0
24
+ accelerate>=0.21.0
xindus_dataset.csv ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ text,summary
2
+ BL2194 Muesli Brown with Asphalt Black Diamond Work with Cotton Thread Heavy Embroidered Bandhgala Designer Indo-Western Blazer, Designer Cotton Blazer Embroidered Brown Black Diamond Work
3
+ 7083 Mischka Gray and Mandys Brown Printed with Bright White Sleeves Super Soft Premium Cotton Designer Shirt,Premium Cotton Shirt Designer Gray Mandys Brown White Sleeves
4
+ BL2158 Gunmetal Navy Blue with Jade Black Chintz Textured Cross Buttoned Bandhgala Designer Blazer,Designer Blazer Navy Blue Black Chintz Textured Cross Buttoned
5
+ 7942 Wineberry With Tangaroa Navy Blue Plaid and Striped Twill Premium Cotton Designer Shirt,Striped Cotton Designer Shirt Wineberry Tangaroa Navy Blue Plaid
6
+ SOC001 Pack of 5: Sunny Yellow with polka dot Deep Teal Blue Striped Navy Blue And Black Premium Combed Cotton Ankle Length Socks,Cotton Ankle Length Socks Polka Dot Stripped Yellow Blue Black
7
+ BL2165 Desert Storm Cream with Eminence Blue Leaves Printed Premium Cotton Designer Blazer,Cotton designer blazer blue leaves print premium quality
8
+ 7825 Bright White with Tuna Blue Plaid Premium Cotton Designer Shirt,Cotton designer shirt bright white with tuna blue plaid
9
+ 5834 Bright Turquoise Blue and Dark Cerulean Twill Plaid Premium Cotton Shirt,Cotton plaid shirt in turquoise and cerulean colors
10
+ Jade Black And Bright White Super Soft Premium Cotton Designer Lounge Pant,Cotton lounge pants jade black and white super soft
11
+ 8304 Bright White with potters Clay Brown Plaid Flannel Designer Overshirt,Flannel overshirt bright white and clay brown plaid
12
+ Jade Black with White Striped Super Soft Premium Cotton Designer Shirt,Cotton designer shirt jade black with white stripes
13
+ 6940 Iroko Brown with Spindle Blue Floral Printed Super Soft Premium Cotton Designer Shirt,Cotton designer shirt Iroko brown blue floral print
14
+ 8940 Bright White with Cadet Pink Multicolour Printed Super Soft Premium Cotton Shirt,Giza cotton shirt bright white with jade black patch
15
+ 8940 Bright White with Cadet Pink Multicolour Printed Super Soft Premium Cotton Shirt,Cotton shirt bright white with cadet pink print
16
+ 5622 Pacific Blue Striped Dobby Textured Premium Giza Cotton Shirt,Giza cotton striped shirt premium quality Pacific Blue
17
+ 8940 Bright White with Cadet Pink Multicolour Printed Super Soft Premium Cotton Shirt,Cotton shirt bright white with cadet pink print
18
+ ST460CBG Navy Cross Buttoned Bandhgala/Mandarin Wool-Silk blend Suit,Wool-silk blend Navy Bandhgala suit with cross buttons
19
+ 8166 Half Charcoal Grey and Half Catalina Blue Chambray Textured Premium Cotton Hoodie Designer jacket,Cotton hoodie half charcoal grey half blue chambray
20
+ 6174 Cloud Burst Blue Butterfly Printed Premium Cotton Kurta Shirt,Cotton kurta shirt blue butterfly print premium quality
21
+ 6834 Manatee Grey with Multicolour Floral Print Premium Cotton Shirt,Cotton shirt Manatee Grey multicolour floral print
22
+ TS524 Manatee with Cloud Burst Jacquard Textured Super Soft Pique T-Shirt,Manatee with Cloud Burst Jacquard Textured Super Soft Pique T-Shirt
23
+
24
+