Spaces:

Jan90
/

RemotelyBest_Development_of_AI_Application

Sleeping

App Files Files Community

RemotelyBest_Development_of_AI_Application / final_code.py

Jan90

Upload final_code.py

3fb595f verified about 1 year ago

raw

history blame

3.99 kB

	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"# Load model directly\n",
	"from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForSequenceClassification, TextClassificationPipeline\n",
	"import torch\n",
	"import gradio as gr\n",
	"from openpyxl import load_workbook\n",
	"from numpy import mean\n",
	"\n",
	"tokenizer = AutoTokenizer.from_pretrained(\"suriya7/bart-finetuned-text-summarization\")\n",
	"model = AutoModelForSeq2SeqLM.from_pretrained(\"suriya7/bart-finetuned-text-summarization\")\n",
	"\n",
	"tokenizer_keywords = AutoTokenizer.from_pretrained(\"transformer3/H2-keywordextractor\")\n",
	"model_keywords = AutoModelForSeq2SeqLM.from_pretrained(\"transformer3/H2-keywordextractor\")\n",
	"\n",
	"device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
	"# Load the fine-tuned model and tokenizer\n",
	"new_model = AutoModelForSequenceClassification.from_pretrained('roberta-rating')\n",
	"new_tokenizer = AutoTokenizer.from_pretrained('roberta-rating')\n",
	"\n",
	"\n",
	"# Create a classification pipeline\n",
	"classifier = TextClassificationPipeline(model=new_model, tokenizer=new_tokenizer, device=device)\n",
	"\n",
	"# Add label mapping for sentiment analysis\n",
	"label_mapping = {1: '1/5', 2: '2/5', 3: '3/5', 4: '4/5', 5: '5/5'}\n",
	"\n",
	"def parse_xl(file_path):\n",
	" cells = []\n",
	"\n",
	" workbook = load_workbook(filename=file_path)\n",
	" for sheet in workbook.worksheets:\n",
	" for row in sheet.iter_rows():\n",
	" for cell in row:\n",
	" if cell.value != None:\n",
	" cells.append(cell.value)\n",
	"\n",
	" return cells\n",
	"\n",
	"def evaluate(file):\n",
	" reviews = parse_xl(file)\n",
	" ratings = []\n",
	" text = \"\"\n",
	"\n",
	" for review in reviews:\n",
	" ratings.append(int(classifier(review)[0]['label'].split('_')[1]))\n",
	" text += review\n",
	" text += \" \"\n",
	" \n",
	" inputs = tokenizer([text], max_length=1024, truncation=True, return_tensors=\"pt\")\n",
	" summary_ids = model.generate(inputs[\"input_ids\"], num_beams=2, min_length=50, max_length=1000)\n",
	" summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]\n",
	"\n",
	" inputs_keywords = tokenizer_keywords([text], max_length=1024, truncation=True, return_tensors=\"pt\")\n",
	" summary_ids_keywords = model_keywords.generate(inputs_keywords[\"input_ids\"], num_beams=2, min_length=0, max_length=100)\n",
	" keywords = tokenizer_keywords.batch_decode(summary_ids_keywords, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] \n",
	"\n",
	" return round(mean(ratings), 2), summary, keywords\n",
	"\n",
	"iface = gr.Interface(\n",
	" fn=evaluate,\n",
	" inputs=gr.File(label=\"Reviews\", file_types=[\".xlsx\", \".xlsm\", \".xltx\", \".xltm\"]),\n",
	" outputs=[gr.Textbox(label=\"Rating\"), gr.Textbox(label=\"Summary\"), gr.Textbox(label=\"Keywords\")],\n",
	" title='Summarize Reviews',\n",
	" description=\"Evaluate and summarize collection of reviews. Reviews are submitted as an Excel file, where each reviews is in its own cell.\"\n",
	")\n",
	"\n",
	"iface.launch(share=True)"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "SolutionsInPR",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.12.3"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}