import os from urllib import response import requests from tqdm import tqdm from openai import OpenAI parent = './CleanedHospitalData' with open('./openrouteapi.txt', 'r') as f: openrouterapi = f.read().strip() def get_from_or(file_path): if file_path.endswith('.json'): file_path = os.path.join(parent, file_path) else: return 'error' output_file = file_path.replace('CleanedHospitalData', 'SummarizedCleanedHospitalData') if os.path.exists(output_file): return 'error' with open(file_path, 'r') as file: content = file.read() client = OpenAI( base_url="https://openrouter.ai/api/v1", api_key=openrouterapi, ) completion = client.chat.completions.create( # model="deepseek/deepseek-chat-v3.1:free", model="x-ai/grok-4-fast:free", # messages= [ # { # 'role': 'user', # 'content': content, # }, # {'role': 'user', 'content': 'Summarize the above text with Hospital name as the heading and in key word format. Make it as precise as possible and do not assume anything apart from the text. Give me the keywords related to the technologies that are offered by the hospital, The specialties they have and the services they provide. Ignore writing everything else. Do not write any warnings or disclaimers, only useful text.' } # ], # ) messages= [ { 'role': 'user', 'content': [{'type': 'text', 'text': content}], }, {'role': 'user', 'content': [{'type': 'text', 'text': 'Summarize the above text with Hospital name as the heading and in key word format. Make it as precise as possible and do not assume anything apart from the text. Give me the keywords related to the technologies that are offered by the hospital, The specialties they have and the services they provide. Ignore writing everything else. Do not write any warnings or disclaimers, only useful text.'}] } ], ) try: output = (completion.choices[0].message.content) with open(output_file, 'w') as out_file: out_file.write(output) return output except Exception as e: print(f"Error occurred: {e}") return 'error' files = os.listdir(parent) import multiprocessing as mp if __name__ == '__main__': with mp.Pool(processes=100) as pool: pool.map(get_from_or, files)