SwissAI-Team7 / summarize.py
Razican's picture
Upload 3 files (#4)
6f81945 verified
import os
from urllib import response
import requests
from tqdm import tqdm
from openai import OpenAI
parent = './CleanedHospitalData'
with open('./openrouteapi.txt', 'r') as f:
openrouterapi = f.read().strip()
def get_from_or(file_path):
if file_path.endswith('.json'):
file_path = os.path.join(parent, file_path)
else:
return 'error'
output_file = file_path.replace('CleanedHospitalData', 'SummarizedCleanedHospitalData')
if os.path.exists(output_file):
return 'error'
with open(file_path, 'r') as file:
content = file.read()
client = OpenAI(
base_url="https://openrouter.ai/api/v1",
api_key=openrouterapi,
)
completion = client.chat.completions.create(
# model="deepseek/deepseek-chat-v3.1:free",
model="x-ai/grok-4-fast:free",
# messages= [
# {
# 'role': 'user',
# 'content': content,
# },
# {'role': 'user', 'content': 'Summarize the above text with Hospital name as the heading and in key word format. Make it as precise as possible and do not assume anything apart from the text. Give me the keywords related to the technologies that are offered by the hospital, The specialties they have and the services they provide. Ignore writing everything else. Do not write any warnings or disclaimers, only useful text.' }
# ],
# )
messages= [
{
'role': 'user',
'content': [{'type': 'text', 'text': content}],
},
{'role': 'user', 'content': [{'type': 'text', 'text': 'Summarize the above text with Hospital name as the heading and in key word format. Make it as precise as possible and do not assume anything apart from the text. Give me the keywords related to the technologies that are offered by the hospital, The specialties they have and the services they provide. Ignore writing everything else. Do not write any warnings or disclaimers, only useful text.'}] }
],
)
try:
output = (completion.choices[0].message.content)
with open(output_file, 'w') as out_file:
out_file.write(output)
return output
except Exception as e:
print(f"Error occurred: {e}")
return 'error'
files = os.listdir(parent)
import multiprocessing as mp
if __name__ == '__main__':
with mp.Pool(processes=100) as pool:
pool.map(get_from_or, files)