Spaces:
Sleeping
Sleeping
| import os | |
| import openai | |
| import pandas as pd | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| openai.api_key = os.getenv('OPENAI_API_KEY') | |
| folder_path = "racist_deeds_text" | |
| output_csv = "deed_names_locations.csv" | |
| data = [] | |
| def extract_names_and_locations(text): | |
| """ | |
| Extract names and locations from text using OpenAI. | |
| """ | |
| try: | |
| response = openai.ChatCompletion.create( | |
| model="gpt-4", | |
| messages=[ | |
| { | |
| "role": "system", | |
| "content": ( | |
| "You are an assistant that extracts names and locations from legal text. " | |
| "For the given input, identify all names of people (grantors, grantees) and " | |
| "locations (addresses, city, county, state). " | |
| "Return the names as a comma-separated list and locations as a separate comma-separated list " | |
| "strictly in the format:\nNames: [comma-separated names]\nLocations: [comma-separated locations]." | |
| ) | |
| }, | |
| {"role": "user", "content": text} | |
| ] | |
| ) | |
| output = response.choices[0].message.content.strip() | |
| names, locations = "", "" | |
| for line in output.split("\n"): | |
| if line.startswith("Names:"): | |
| names = line.replace("Names:", "").strip() | |
| elif line.startswith("Locations:"): | |
| locations = line.replace("Locations:", "").strip() | |
| return names, locations | |
| except Exception as e: | |
| print(f"Error extracting names and locations: {e}") | |
| return "", "" | |
| for filename in os.listdir(folder_path): | |
| if filename.endswith(".txt"): | |
| file_path = os.path.join(folder_path, filename) | |
| with open(file_path, "r") as file: | |
| text = file.read() | |
| names, locations = extract_names_and_locations(text) | |
| data.append({"Filename": filename, "Names": names, "Locations": locations}) | |
| print(f"Processed {filename}") | |
| df = pd.DataFrame(data) | |
| df.to_csv(output_csv, index=False) | |
| print(f"Results saved to {output_csv}") |