sunny333 commited on
Commit
ec0c7bc
·
1 Parent(s): 0cc4366
Files changed (40) hide show
  1. README.md +10 -2
  2. app.py +10 -0
  3. data/cleaned/__init__.py +0 -0
  4. data/cleaned/candidates.csv +21 -0
  5. data/cleaned/candidates.json +162 -0
  6. data/cleaned/cleanedReq.json +89 -0
  7. data/cleaned/jsonResult.json +0 -0
  8. data/cleaned/reqs.csv +95 -0
  9. data/image/arch.png +0 -0
  10. data/raw/__init__.py +0 -0
  11. data/raw/jobreq.txt +28 -0
  12. params.yaml +9 -0
  13. requirements.txt +16 -0
  14. research/research.ipynb +629 -0
  15. schema.yaml +0 -0
  16. src/AI_AGENT_RESUME_SELECTOR/__init__.py +0 -0
  17. src/AI_AGENT_RESUME_SELECTOR/__pycache__/__init__.cpython-312.pyc +0 -0
  18. src/AI_AGENT_RESUME_SELECTOR/__pycache__/__init__.cpython-313.pyc +0 -0
  19. src/AI_AGENT_RESUME_SELECTOR/components/__init__.py +0 -0
  20. src/AI_AGENT_RESUME_SELECTOR/components/__pycache__/__init__.cpython-312.pyc +0 -0
  21. src/AI_AGENT_RESUME_SELECTOR/components/__pycache__/__init__.cpython-313.pyc +0 -0
  22. src/AI_AGENT_RESUME_SELECTOR/components/__pycache__/canddidatePreprocessingAgent.cpython-312.pyc +0 -0
  23. src/AI_AGENT_RESUME_SELECTOR/components/__pycache__/canddidatePreprocessingAgent.cpython-313.pyc +0 -0
  24. src/AI_AGENT_RESUME_SELECTOR/components/__pycache__/jobpreprocessingAgent.cpython-313.pyc +0 -0
  25. src/AI_AGENT_RESUME_SELECTOR/components/__pycache__/myagent.cpython-313.pyc +0 -0
  26. src/AI_AGENT_RESUME_SELECTOR/components/agentUI.py +0 -0
  27. src/AI_AGENT_RESUME_SELECTOR/components/canddidatePreprocessingAgent.py +76 -0
  28. src/AI_AGENT_RESUME_SELECTOR/components/dataIngestion.py +26 -0
  29. src/AI_AGENT_RESUME_SELECTOR/components/entity/__init__.py +0 -0
  30. src/AI_AGENT_RESUME_SELECTOR/components/entity/__pycache__/__init__.cpython-312.pyc +0 -0
  31. src/AI_AGENT_RESUME_SELECTOR/components/entity/__pycache__/__init__.cpython-313.pyc +0 -0
  32. src/AI_AGENT_RESUME_SELECTOR/components/entity/__pycache__/projectEntity.cpython-312.pyc +0 -0
  33. src/AI_AGENT_RESUME_SELECTOR/components/entity/__pycache__/projectEntity.cpython-313.pyc +0 -0
  34. src/AI_AGENT_RESUME_SELECTOR/components/entity/projectEntity.py +16 -0
  35. src/AI_AGENT_RESUME_SELECTOR/components/jobpreprocessingAgent.py +52 -0
  36. src/AI_AGENT_RESUME_SELECTOR/components/myagent.py +134 -0
  37. src/__init__.py +0 -0
  38. src/__pycache__/__init__.cpython-312.pyc +0 -0
  39. src/__pycache__/__init__.cpython-313.pyc +0 -0
  40. template.py +33 -0
README.md CHANGED
@@ -1,2 +1,10 @@
1
- # multiagent_coordinator_agent
2
- coordination between multiple agents
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ sdk: gradio
4
+ emoji: 🚀
5
+ colorFrom: blue
6
+ colorTo: yellow
7
+ pinned: true
8
+ short_description: AI recruiter
9
+ sdk_version: 5.23.1
10
+ ---
app.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.AI_AGENT_RESUME_SELECTOR.components.entity import projectEntity
2
+ from src.AI_AGENT_RESUME_SELECTOR.components import canddidatePreprocessingAgent
3
+ from src.AI_AGENT_RESUME_SELECTOR.components import jobpreprocessingAgent
4
+
5
+ from src.AI_AGENT_RESUME_SELECTOR.components import myagent
6
+
7
+ myagent.face.launch()
8
+
9
+
10
+
data/cleaned/__init__.py ADDED
File without changes
data/cleaned/candidates.csv ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ,skills,degree_names,professional_company_names,related_skils_in_job,start_dates,end_dates
2
+ 361,"['Python', 'LeetCode', 'GDB', 'GitHub', 'HTML', 'MySQL', 'Lucidchart', 'Postman', 'OWASP ZAP', 'Jira']",['Bachelor of Science'],"['Charles Schwab', 'Sysco', 'Accenture']","[['Python', 'GDB', 'GitHub'], ['PowerShell'], ['OWASP ZAP', 'Jira']]","['2020', '2017', '2015']","['current', '2020', '2017']"
3
+ 73,"['assembly', 'ATM', 'Cadence', 'calibration', 'CA', 'hardware', 'DC', 'de-bugging', 'Ethernet', 'Graphic', 'ISO 9000', 'LabView', 'layout', 'Linux OS', 'Logic', 'Mentor', 'Access', 'MS Office', 'Windows', 'multimeters', 'network', 'networking', 'ORCAD', 'oscilloscopes', 'power supplies', 'procurement', 'purchasing', 'Quality Assurance', 'Routers', 'schematics', 'signal generators', 'SONET', 'Switches', 'T1', 'test equipment', 'trouble shooting', 'troubleshooting', 'validation', 'View']",['Associate Degree'],"['Company Name', 'Company Name', 'Company Name']","[['testing', 'troubleshooting', 'ISO 9000', 'schematics', 'PCB rework'], ['testing', 'troubleshooting', 'ISO 9000', 'schematics', 'PCB rework'], ['testing', 'troubleshooting', 'ISO 9000', 'schematics', 'PCB layout']]","['August 2007', 'January 2002', 'April 1995']","['October 2014', 'June 2007', 'November 2001']"
4
+ 374,"['Business Analyst', 'Data Analysis', 'Machine Learning', 'Python', 'C++', 'DotNet', 'Java', 'PyTorch']",['B.Tech (Civil)'],['CITI'],"[['Data Analysis', 'Machine Learning']]",['March 2019'],['June 2020']
5
+ 155,"['Software Engineer', 'Data Analyst', 'Machine Learning', 'Text Analytics', 'Software Development', 'Object Oriented Programming', 'Pandas', 'Numpy', 'Java', 'Python', 'SpringBoot', 'Laravel']",['B.Tech'],['KLP Technology Solutions'],"[['Machine Learning', 'Software Engineering', 'Feature Engineering', 'Evaluation Methods']]",['Jan 2019'],['till date']
6
+ 104,"['Project Management', 'Secret Clearance', 'Risk management', 'Performance tracking and evaluation', 'Team building', 'Project analysis', 'Leadership mentoring', 'Business and requirements analysis', 'Operations management', 'Decisive leader']","['Graduate Certificate', 'MBA', 'Bachelor of Science']","['Company Name', 'Company Name', 'Company Name']","[['Project management', 'Leadership', 'Risk management', 'Business development', 'Process improvement'], ['Technical support', 'Project management', 'Risk management'], ['Leadership', 'Training', 'Logistics']]","['01/2004', '01/2002', '07/1991']","['Current', '01/2004', '01/2002']"
7
+ 394,"['Microsoft Applications', 'Network Security', 'Networking', 'PC hardware and software installation, configuration, and troubleshooting', 'Remote Desktop and Help Desk Management', 'Verbal Communication', 'Technical Support', 'Team Leadership', 'Programming Languages', 'On-call tech support', 'Windows & Mac OS', 'Wiring/Wire Spicing: Cat3, Cat5, Cat5e, Coaxial', 'Management', 'VoIP, TCP/IP, IPSec, ATM, SS7, IPX, DNS, BIND, DHCP, HSRP and LAN/WAN architecture', 'Application Development', 'Voice Over IP Telephone', 'Inventory Management']","['Bachelor Degree', 'Associate Degree']","['N/A', 'Company Name', 'Company Name']","[['Microsoft Applications', 'Windows Applications', 'Mac OS and IOS', 'Network routers', 'Cisco ASA firewall', 'Juniper Net-screen', 'LANs, WANs', 'Cloud Experience'], ['Microsoft applications', 'Windows and Mac OS', 'Linux', 'Web Development'], ['Customer Support', 'Technical Support', 'Network Administration', 'Inventory Management']]","['August 2006', 'August 2013', 'July 2014']","['January 2013', 'September 2014', 'Current']"
8
+ 377,[],"['M.S.', 'B.S.']","['FreshDirect', 'Marketing Science Associates']","[[], []]","['April 2018', 'April 2017']","['September 2018', 'September 2017']"
9
+ 124,"['Project Management', 'Secret Clearance', 'Risk management', 'Performance tracking and evaluation', 'Team building', 'Project analysis', 'Leadership mentoring', 'Business and requirements analysis', 'Operations management', 'Decisive leader']","['Graduate Certificate', 'MBA', 'Bachelor of Science']","['Company Name', 'Company Name', 'Company Name']","[['Project management', 'Leadership', 'Risk management', 'Business development', 'Process improvement'], ['Technical support', 'Project management', 'Risk management'], ['Leadership', 'Training', 'Logistics']]","['01/2004', '01/2002', '07/1991']","['Current', '01/2004', '01/2002']"
10
+ 68,"['C', 'R', 'Catia', 'AutoCAD', 'ANSYS', 'Microsoft office', 'Product Project Management', 'GD&T', 'Minitab', 'Kanban', 'Kaizen', 'FMEA', 'development', 'Toyota production systems', 'APQP', 'PPAP', '8D', '5 why', ""KPI's"", 'Reliability SAE Standards', 'Vehicle ergonomics', 'DVP&R', 'QCC', 'CNC engineering', 'Programming', 'anatomy', 'benchmarking', 'lean manufacturing', 'manufacturing process', 'cost analysis', 'packaging', 'process engineering', 'Product Development', 'surveys', 'validation', 'welding']","['Master of Science', 'Bachelor of Engineering']","['Company Name', 'Company Name']","[[""Delphi's SPQVC"", 'APQP', 'PFMEA', 'lean manufacturing'], ['APQP', 'DVP&R', 'PPAP', 'DFMEA', 'Toyota standards', 'SQD', 'quality tools']]","['07/2017', '06/2013']","['08/2017', '06/2016']"
11
+ 450,"['Data Science', 'Data Analysis', 'Data Analytics', 'Data Mining', 'Deep Learning', 'Machine Learning', 'Artificial Intelligence', 'Linear Regression', 'Python', 'Jupyter', 'OpenCV']",['B.Tech'],['NaxApp'],[None],['Apr 2020'],['May 2020']
12
+ 9,"['PLC', 'IEC 61131 (Ladder Logic, Functional Block Diagram, Structured Text, Instruction List.)', 'Java', 'C', 'Visual Basic', 'VHDL', 'PSpice', 'Assembly (Intel, Motorola, TI)', 'Labview', 'AutoCAD', 'Inventor', 'Matlab', 'Microsoft Office', 'PSIM', 'Easy Power', 'Xilinx ISE', 'Printed Circuit Board CAD (Protel)', 'Siemens Step 7', 'Wago CoDeSys', 'Allen Bradley RSLogix', 'ERP (Alliance, Global Shop, XA, SAP)', 'API', 'automation', 'budgeting', 'cabling', 'CAD', 'Conversion', 'client', 'clients', 'DC', 'Designing', 'flash', 'Functional', 'hardware design', 'HP', 'HVAC', 'instruction', 'Intel', 'microprocessor', 'Modeling', 'Motorola', 'Power distribution', 'power generation', 'processes', 'Programming', 'proposals', 'Renovation', 'renovations', 'safety', 'scheduling', 'schematics', 'Siemens', 'simulation', 'staff management', 'Structured', 'Supervising', 'switchgear', 'tender', 'troubleshoot', 'troubleshooting', 'validation']",['Bachelor of Science'],"['Company Name', 'Company Name', 'Company Name', 'Company Name', 'Company Name']","[['PLC', 'ERP', 'AutoCAD', 'staff management', 'budgeting', 'scheduling'], ['Modeling', 'hardware design', 'simulation', 'verification', 'validation'], ['Designing', 'supervising', 'technical studies', 'trouble shooting'], ['power distribution', 'construction', 'HVAC'], ['design', 'implementation', 'microprocessor']]","['December 2014', 'May 2011', 'October 2008', 'August 2006', 'January 2005']","['Current', 'December 2014', 'May 2011', 'October 2008', 'August 2006']"
13
+ 194,"['Machine Learning', 'Software Development', 'Text Analysis', 'Natural Language Processing', 'Image Processing', 'Python', 'Data Management', 'Scikit Learn', 'C', 'C#', '.NET', 'SQL']",['B.Tech'],['HS Institute'],"[['Data Management', 'Machine Learning']]",['May 2019'],['Oct 2019']
14
+ 406,"['Adobe', 'Customer Relations', 'Document reports', 'Budgeting', 'Cross-functional Team Leadership', 'Business Analysis', 'Access', 'Microsoft Project', 'Organization', 'Advanced Presentation', 'Project Coordinator', 'Python', 'Quality Control', 'Quality Assurance', 'Research', 'Developing other Business Analyst', 'Public Speaking', 'Negotiation', 'SAP', 'SQL', 'SPSS', 'Oracle', 'Fast Learner', 'Strong Requirement tracing', 'Basic financial and operational reporting', 'Superb communication skills', 'Advanced problem solving abilities']",['Bachelor of Arts'],"['Company Name', 'Company Name', 'Company Name']","[['Business process improvement', 'Analytical and Tactical Planning', 'Microsoft Excel', 'Microsoft Word', 'Power Point'], ['Time management', 'Administrative cost reduction', 'Microsoft Excel', 'Microsoft Word'], ['Stakeholder analysis', 'Financial and operation reporting', 'Oracle', 'SQL', 'JIRA', 'SAP']]","['July 2016', 'November 2014', 'May 2014']","['October 2017', 'September 2015', 'November 2014']"
15
+ 84,"['Troubleshooting and problem solving', 'Exceptional telephone etiquette', 'Professional demeanor', 'Self-starter', 'Time management ability', 'Ability to prioritize', 'Deadline-oriented', 'Employee training and development', 'Sales Software: Salesforce.com, Paperless Proposals', 'Learning Management: Saba LMS, Success Factors LMS, Webex, Pedague Testing System', 'Administrative: Microsoft Office Suite, Concur', 'Technical: Vantive, Clarify']",['Bachelor of Science'],"['Company Name', 'Company Name', 'Company Name', 'Company Name', 'Company Name', 'Company Name', 'Company Name', 'Company Name']","[None, None, None, None, None, None, None, None]","['May 2016', 'October 2014', 'February 2014', 'April 2012', 'June 2011', 'January 2011', 'January 2007', 'October 2001']","['Current', 'May 2016', 'October 2014', 'April 2013', 'March 2012', 'April 2011', 'May 2010', 'January 2007']"
16
+ 371,"['Machine Learning', 'Method Development', 'Artificial Intelligence', 'Data Modeling', 'Data Visualization', 'Data Validation', 'Deep Learning', 'MySQL', 'MongoDB', 'Python', 'Plotly', 'Seaborn', 'Matplotlib']",['B.Tech'],['BlueM Solutions'],"[['Machine Learning', 'Python']]",['Jun 2020'],['till date']
17
+ 388,"['Budget forecasting expertise', 'Analytical reasoning', 'Account reconciliation expert', 'Strong organizational skills', 'General ledger accounting', 'Expert in customer relations', 'Flexible team player', 'Advanced computer proficiency (PC and Mac)', 'Performed debit, credit and total accounts on computer spreadsheets/databases, using specialized accounting software.', 'Received, recorded, and banked cash, checks, and vouchers as well as reconciled records of bank transactions.', 'Worked with management to document and offset unusual expense variances in their respective areas.', 'Assisted the CFO with the production of the monthly financials, management reports and board packages.', 'Coded the general ledger and processed vendor invoice payments.', 'Researched and resolved billing and invoice problems.', 'Account reconciliations', 'Accounting', 'Accountant', 'accounting systems', 'accounting system', 'Accounts payable', 'Adobe Acrobat', 'agency', 'Balance', 'banking', 'Budget analysis', 'budget', 'bi', 'cash handling', 'clerical', 'contracts', 'cost accounting', 'Database', 'fax', 'FDS', 'Finance', 'Financial', 'Financial analysis', 'financial management', 'financial operations', 'financial reports', 'financial statements', 'fiscal management', 'forecasting', 'functional', 'fund accounting', 'funds', 'General Ledger', 'Government', 'grants', 'Innovation', 'maintains inventory', 'inventory', 'leadership skills', 'Macintosh', 'Mainframe', 'managerial', 'managerial accounting', 'Access', 'Microsoft Access', 'Microsoft Excel', 'Excel', 'Microsoft Office software', 'Office', 'Outlook', 'PowerPoint', 'Publisher', 'Windows', 'Word', 'Monitors', 'newsletter', 'Payroll', 'IBM-PC', 'copier', 'Policies', 'processes', 'profit', 'Program Development', 'proposals', 'QuickBooks', 'reconciling', 'record keeping', 'repairs', 'reporting', 'scanner', 'technical support', 'phone', 'time management', 'type', 'written', 'annual reports', 'year-end']","[""Master's"", 'BBA']","['Company Name', 'Company Name', 'Company Name']","[['Advanced computer proficiency (PC and Mac)', 'General ledger accounting', 'Researched and resolved billing and invoice problems'], ['Revised and streamlined inefficient work procedures with automation software', 'Reduced time and costs and increased efficiency by introducing new accounting procedures', 'Executed accounts receivable reporting enhancements and reconciliation procedures'], ['Developed and implemented a Microsoft Excel Financial Database system for streamlining all financial and charitable contributions record system for the Church']]","['04/2016', '04/2010', '04/2006']","['Current', '04/2016', 'Current']"
18
+ 495,"['Project Management', 'Secret Clearance', 'Risk management', 'Performance tracking and evaluation', 'Team building', 'Project analysis', 'Leadership mentoring', 'Business and requirements analysis', 'Operations management', 'Decisive leader']","['Graduate Certificate', 'MBA', 'Bachelor of Science']","['Company Name', 'Company Name', 'Company Name']","[['Project management', 'Leadership', 'Risk management', 'Business development', 'Process improvement'], ['Technical support', 'Project management', 'Risk management'], ['Leadership', 'Training', 'Logistics']]","['01/2004', '01/2002', '07/1991']","['Current', '01/2004', '01/2002']"
19
+ 30,"['Army', 'basic', 'bridges', 'inspection', 'Inspects', 'Marconi', 'Office', 'Radar', 'multi-meters', 'oscilloscopes', 'personnel', 'radio', 'Read', 'Safety', 'signal generators', 'Telephone', 'troubleshooting', 'VHF']","['Diploma', 'N/A', 'N/A', 'N/A', 'N/A']","['Company Name', 'Company Name', 'Company Name']","[['Supervised construction', 'Read and interpret construction drawings', 'Estimate material, time, labor, and equipment', 'Inspect structures'], ['Circuit analysis', 'Complex test instruments', 'Engineering tools'], ['Circuit analysis', 'Complex test instruments', 'Engineering tools']]","['January 2012', 'January 2000', 'January 1992']","['January 2013', 'Current', 'January 2000']"
20
+ 316,"['QuickBooks', 'Sage', 'Lacerte', 'Intuit ProSeries', 'Microsoft Word', 'Microsoft Excel', 'Microsoft Access', 'Microsoft Outlook', 'Microsoft Power Pivot', 'Microsoft PowerPoint', 'CalBench', 'FASB & GAAP standards', 'Sarbanes-Oxley', 'Generally Accepted Auditing Standards']","['Master of Science', 'Bachelor of Science']","['Company Name', 'Company Name', 'Company Name', 'Company Name']","[['cash receipts', 'A/R', 'A/P', 'notes payable', 'interest payable', 'unearned revenues', 'legal services', 'revenues', 'expenses', 'office supplies', 'insurance', 'payroll processing', 'petty cash', 'reconciliation', 'financial statement analysis', 'tax returns'], ['A/R', 'A/P', 'tangible assets', 'revenue', 'expenses', 'COGS', 'depreciation', 'advertising', 'inventory', 'intangible assets', 'line of credit', 'bad debts', 'credit card transactions', 'financial statements'], ['stocking', 'customer relations', 'loading/unloading', 'order picking/packing'], ['business operations', 'workforce management', 'customer satisfaction', 'social networking']]","['January 2016', 'February 2015', 'May 2010', 'September 1996']","['Current', 'January 2016', 'August 2011', 'March 2004']"
21
+ 408,"['Machine Learning', 'Software Developer', 'Software Engineering', 'ML Engineer', 'Model Building', 'Deep Learning', 'Numpy', 'Pandas', 'PySpark', 'Hadoop', 'Matplotlib', 'Keras', 'Tensorflow', 'TensorflowJS', 'AngularJS', 'Computer Vision', 'Natural Language Processing', 'HuggingFace', 'Data Wrangling', 'Scikit Learn', 'Algorithm Optimization']",['B.Tech'],['Larsen & Toubro'],"[['Machine Learning', 'Computer Vision', 'Natural Language Processing']]",['Jul 2019'],['Till Date']
data/cleaned/candidates.json ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "id": 0,
4
+ "skills": "['Python', 'LeetCode', 'GDB', 'GitHub', 'HTML', 'MySQL', 'Lucidchart', 'Postman', 'OWASP ZAP', 'Jira']",
5
+ "degree_names": "['Bachelor of Science']",
6
+ "professional_company_names": "['Charles Schwab', 'Sysco', 'Accenture']",
7
+ "start_dates": "['2020', '2017', '2015']",
8
+ "end_dates": "['current', '2020', '2017']"
9
+ },
10
+ {
11
+ "id": 1,
12
+ "skills": "['assembly', 'ATM', 'Cadence', 'calibration', 'CA', 'hardware', 'DC', 'de-bugging', 'Ethernet', 'Graphic', 'ISO 9000', 'LabView', 'layout', 'Linux OS', 'Logic', 'Mentor', 'Access', 'MS Office', 'Windows', 'multimeters', 'network', 'networking', 'ORCAD', 'oscilloscopes', 'power supplies', 'procurement', 'purchasing', 'Quality Assurance', 'Routers', 'schematics', 'signal generators', 'SONET', 'Switches', 'T1', 'test equipment', 'trouble shooting', 'troubleshooting', 'validation', 'View']",
13
+ "degree_names": "['Associate Degree']",
14
+ "professional_company_names": "['Company Name', 'Company Name', 'Company Name']",
15
+ "start_dates": "['August 2007', 'January 2002', 'April 1995']",
16
+ "end_dates": "['October 2014', 'June 2007', 'November 2001']"
17
+ },
18
+ {
19
+ "id": 2,
20
+ "skills": "['Business Analyst', 'Data Analysis', 'Machine Learning', 'Python', 'C++', 'DotNet', 'Java', 'PyTorch']",
21
+ "degree_names": "['B.Tech (Civil)']",
22
+ "professional_company_names": "['CITI']",
23
+ "start_dates": "['March 2019']",
24
+ "end_dates": "['June 2020']"
25
+ },
26
+ {
27
+ "id": 3,
28
+ "skills": "['Software Engineer', 'Data Analyst', 'Machine Learning', 'Text Analytics', 'Software Development', 'Object Oriented Programming', 'Pandas', 'Numpy', 'Java', 'Python', 'SpringBoot', 'Laravel']",
29
+ "degree_names": "['B.Tech']",
30
+ "professional_company_names": "['KLP Technology Solutions']",
31
+ "start_dates": "['Jan 2019']",
32
+ "end_dates": "['till date']"
33
+ },
34
+ {
35
+ "id": 4,
36
+ "skills": "['Project Management', 'Secret Clearance', 'Risk management', 'Performance tracking and evaluation', 'Team building', 'Project analysis', 'Leadership mentoring', 'Business and requirements analysis', 'Operations management', 'Decisive leader']",
37
+ "degree_names": "['Graduate Certificate', 'MBA', 'Bachelor of Science']",
38
+ "professional_company_names": "['Company Name', 'Company Name', 'Company Name']",
39
+ "start_dates": "['01/2004', '01/2002', '07/1991']",
40
+ "end_dates": "['Current', '01/2004', '01/2002']"
41
+ },
42
+ {
43
+ "id": 5,
44
+ "skills": "['Microsoft Applications', 'Network Security', 'Networking', 'PC hardware and software installation, configuration, and troubleshooting', 'Remote Desktop and Help Desk Management', 'Verbal Communication', 'Technical Support', 'Team Leadership', 'Programming Languages', 'On-call tech support', 'Windows & Mac OS', 'Wiring/Wire Spicing: Cat3, Cat5, Cat5e, Coaxial', 'Management', 'VoIP, TCP/IP, IPSec, ATM, SS7, IPX, DNS, BIND, DHCP, HSRP and LAN/WAN architecture', 'Application Development', 'Voice Over IP Telephone', 'Inventory Management']",
45
+ "degree_names": "['Bachelor Degree', 'Associate Degree']",
46
+ "professional_company_names": "['N/A', 'Company Name', 'Company Name']",
47
+ "start_dates": "['August 2006', 'August 2013', 'July 2014']",
48
+ "end_dates": "['January 2013', 'September 2014', 'Current']"
49
+ },
50
+ {
51
+ "id": 6,
52
+ "skills": "[]",
53
+ "degree_names": "['M.S.', 'B.S.']",
54
+ "professional_company_names": "['FreshDirect', 'Marketing Science Associates']",
55
+ "start_dates": "['April 2018', 'April 2017']",
56
+ "end_dates": "['September 2018', 'September 2017']"
57
+ },
58
+ {
59
+ "id": 7,
60
+ "skills": "['Project Management', 'Secret Clearance', 'Risk management', 'Performance tracking and evaluation', 'Team building', 'Project analysis', 'Leadership mentoring', 'Business and requirements analysis', 'Operations management', 'Decisive leader']",
61
+ "degree_names": "['Graduate Certificate', 'MBA', 'Bachelor of Science']",
62
+ "professional_company_names": "['Company Name', 'Company Name', 'Company Name']",
63
+ "start_dates": "['01/2004', '01/2002', '07/1991']",
64
+ "end_dates": "['Current', '01/2004', '01/2002']"
65
+ },
66
+ {
67
+ "id": 8,
68
+ "skills": "['C', 'R', 'Catia', 'AutoCAD', 'ANSYS', 'Microsoft office', 'Product Project Management', 'GD&T', 'Minitab', 'Kanban', 'Kaizen', 'FMEA', 'development', 'Toyota production systems', 'APQP', 'PPAP', '8D', '5 why', \"KPI's\", 'Reliability SAE Standards', 'Vehicle ergonomics', 'DVP&R', 'QCC', 'CNC engineering', 'Programming', 'anatomy', 'benchmarking', 'lean manufacturing', 'manufacturing process', 'cost analysis', 'packaging', 'process engineering', 'Product Development', 'surveys', 'validation', 'welding']",
69
+ "degree_names": "['Master of Science', 'Bachelor of Engineering']",
70
+ "professional_company_names": "['Company Name', 'Company Name']",
71
+ "start_dates": "['07/2017', '06/2013']",
72
+ "end_dates": "['08/2017', '06/2016']"
73
+ },
74
+ {
75
+ "id": 9,
76
+ "skills": "['Data Science', 'Data Analysis', 'Data Analytics', 'Data Mining', 'Deep Learning', 'Machine Learning', 'Artificial Intelligence', 'Linear Regression', 'Python', 'Jupyter', 'OpenCV']",
77
+ "degree_names": "['B.Tech']",
78
+ "professional_company_names": "['NaxApp']",
79
+ "start_dates": "['Apr 2020']",
80
+ "end_dates": "['May 2020']"
81
+ },
82
+ {
83
+ "id": 10,
84
+ "skills": "['PLC', 'IEC 61131 (Ladder Logic, Functional Block Diagram, Structured Text, Instruction List.)', 'Java', 'C', 'Visual Basic', 'VHDL', 'PSpice', 'Assembly (Intel, Motorola, TI)', 'Labview', 'AutoCAD', 'Inventor', 'Matlab', 'Microsoft Office', 'PSIM', 'Easy Power', 'Xilinx ISE', 'Printed Circuit Board CAD (Protel)', 'Siemens Step 7', 'Wago CoDeSys', 'Allen Bradley RSLogix', 'ERP (Alliance, Global Shop, XA, SAP)', 'API', 'automation', 'budgeting', 'cabling', 'CAD', 'Conversion', 'client', 'clients', 'DC', 'Designing', 'flash', 'Functional', 'hardware design', 'HP', 'HVAC', 'instruction', 'Intel', 'microprocessor', 'Modeling', 'Motorola', 'Power distribution', 'power generation', 'processes', 'Programming', 'proposals', 'Renovation', 'renovations', 'safety', 'scheduling', 'schematics', 'Siemens', 'simulation', 'staff management', 'Structured', 'Supervising', 'switchgear', 'tender', 'troubleshoot', 'troubleshooting', 'validation']",
85
+ "degree_names": "['Bachelor of Science']",
86
+ "professional_company_names": "['Company Name', 'Company Name', 'Company Name', 'Company Name', 'Company Name']",
87
+ "start_dates": "['December 2014', 'May 2011', 'October 2008', 'August 2006', 'January 2005']",
88
+ "end_dates": "['Current', 'December 2014', 'May 2011', 'October 2008', 'August 2006']"
89
+ },
90
+ {
91
+ "id": 11,
92
+ "skills": "['Machine Learning', 'Software Development', 'Text Analysis', 'Natural Language Processing', 'Image Processing', 'Python', 'Data Management', 'Scikit Learn', 'C', 'C#', '.NET', 'SQL']",
93
+ "degree_names": "['B.Tech']",
94
+ "professional_company_names": "['HS Institute']",
95
+ "start_dates": "['May 2019']",
96
+ "end_dates": "['Oct 2019']"
97
+ },
98
+ {
99
+ "id": 12,
100
+ "skills": "['Adobe', 'Customer Relations', 'Document reports', 'Budgeting', 'Cross-functional Team Leadership', 'Business Analysis', 'Access', 'Microsoft Project', 'Organization', 'Advanced Presentation', 'Project Coordinator', 'Python', 'Quality Control', 'Quality Assurance', 'Research', 'Developing other Business Analyst', 'Public Speaking', 'Negotiation', 'SAP', 'SQL', 'SPSS', 'Oracle', 'Fast Learner', 'Strong Requirement tracing', 'Basic financial and operational reporting', 'Superb communication skills', 'Advanced problem solving abilities']",
101
+ "degree_names": "['Bachelor of Arts']",
102
+ "professional_company_names": "['Company Name', 'Company Name', 'Company Name']",
103
+ "start_dates": "['July 2016', 'November 2014', 'May 2014']",
104
+ "end_dates": "['October 2017', 'September 2015', 'November 2014']"
105
+ },
106
+ {
107
+ "id": 13,
108
+ "skills": "['Troubleshooting and problem solving', 'Exceptional telephone etiquette', 'Professional demeanor', 'Self-starter', 'Time management ability', 'Ability to prioritize', 'Deadline-oriented', 'Employee training and development', 'Sales Software: Salesforce.com, Paperless Proposals', 'Learning Management: Saba LMS, Success Factors LMS, Webex, Pedague Testing System', 'Administrative: Microsoft Office Suite, Concur', 'Technical: Vantive, Clarify']",
109
+ "degree_names": "['Bachelor of Science']",
110
+ "professional_company_names": "['Company Name', 'Company Name', 'Company Name', 'Company Name', 'Company Name', 'Company Name', 'Company Name', 'Company Name']",
111
+ "start_dates": "['May 2016', 'October 2014', 'February 2014', 'April 2012', 'June 2011', 'January 2011', 'January 2007', 'October 2001']",
112
+ "end_dates": "['Current', 'May 2016', 'October 2014', 'April 2013', 'March 2012', 'April 2011', 'May 2010', 'January 2007']"
113
+ },
114
+ {
115
+ "id": 14,
116
+ "skills": "['Machine Learning', 'Method Development', 'Artificial Intelligence', 'Data Modeling', 'Data Visualization', 'Data Validation', 'Deep Learning', 'MySQL', 'MongoDB', 'Python', 'Plotly', 'Seaborn', 'Matplotlib']",
117
+ "degree_names": "['B.Tech']",
118
+ "professional_company_names": "['BlueM Solutions']",
119
+ "start_dates": "['Jun 2020']",
120
+ "end_dates": "['till date']"
121
+ },
122
+ {
123
+ "id": 15,
124
+ "skills": "['Budget forecasting expertise', 'Analytical reasoning', 'Account reconciliation expert', 'Strong organizational skills', 'General ledger accounting', 'Expert in customer relations', 'Flexible team player', 'Advanced computer proficiency (PC and Mac)', 'Performed debit, credit and total accounts on computer spreadsheets/databases, using specialized accounting software.', 'Received, recorded, and banked cash, checks, and vouchers as well as reconciled records of bank transactions.', 'Worked with management to document and offset unusual expense variances in their respective areas.', 'Assisted the CFO with the production of the monthly financials, management reports and board packages.', 'Coded the general ledger and processed vendor invoice payments.', 'Researched and resolved billing and invoice problems.', 'Account reconciliations', 'Accounting', 'Accountant', 'accounting systems', 'accounting system', 'Accounts payable', 'Adobe Acrobat', 'agency', 'Balance', 'banking', 'Budget analysis', 'budget', 'bi', 'cash handling', 'clerical', 'contracts', 'cost accounting', 'Database', 'fax', 'FDS', 'Finance', 'Financial', 'Financial analysis', 'financial management', 'financial operations', 'financial reports', 'financial statements', 'fiscal management', 'forecasting', 'functional', 'fund accounting', 'funds', 'General Ledger', 'Government', 'grants', 'Innovation', 'maintains inventory', 'inventory', 'leadership skills', 'Macintosh', 'Mainframe', 'managerial', 'managerial accounting', 'Access', 'Microsoft Access', 'Microsoft Excel', 'Excel', 'Microsoft Office software', 'Office', 'Outlook', 'PowerPoint', 'Publisher', 'Windows', 'Word', 'Monitors', 'newsletter', 'Payroll', 'IBM-PC', 'copier', 'Policies', 'processes', 'profit', 'Program Development', 'proposals', 'QuickBooks', 'reconciling', 'record keeping', 'repairs', 'reporting', 'scanner', 'technical support', 'phone', 'time management', 'type', 'written', 'annual reports', 'year-end']",
125
+ "degree_names": "[\"Master's\", 'BBA']",
126
+ "professional_company_names": "['Company Name', 'Company Name', 'Company Name']",
127
+ "start_dates": "['04/2016', '04/2010', '04/2006']",
128
+ "end_dates": "['Current', '04/2016', 'Current']"
129
+ },
130
+ {
131
+ "id": 16,
132
+ "skills": "['Project Management', 'Secret Clearance', 'Risk management', 'Performance tracking and evaluation', 'Team building', 'Project analysis', 'Leadership mentoring', 'Business and requirements analysis', 'Operations management', 'Decisive leader']",
133
+ "degree_names": "['Graduate Certificate', 'MBA', 'Bachelor of Science']",
134
+ "professional_company_names": "['Company Name', 'Company Name', 'Company Name']",
135
+ "start_dates": "['01/2004', '01/2002', '07/1991']",
136
+ "end_dates": "['Current', '01/2004', '01/2002']"
137
+ },
138
+ {
139
+ "id": 17,
140
+ "skills": "['Army', 'basic', 'bridges', 'inspection', 'Inspects', 'Marconi', 'Office', 'Radar', 'multi-meters', 'oscilloscopes', 'personnel', 'radio', 'Read', 'Safety', 'signal generators', 'Telephone', 'troubleshooting', 'VHF']",
141
+ "degree_names": "['Diploma', 'N/A', 'N/A', 'N/A', 'N/A']",
142
+ "professional_company_names": "['Company Name', 'Company Name', 'Company Name']",
143
+ "start_dates": "['January 2012', 'January 2000', 'January 1992']",
144
+ "end_dates": "['January 2013', 'Current', 'January 2000']"
145
+ },
146
+ {
147
+ "id": 18,
148
+ "skills": "['QuickBooks', 'Sage', 'Lacerte', 'Intuit ProSeries', 'Microsoft Word', 'Microsoft Excel', 'Microsoft Access', 'Microsoft Outlook', 'Microsoft Power Pivot', 'Microsoft PowerPoint', 'CalBench', 'FASB & GAAP standards', 'Sarbanes-Oxley', 'Generally Accepted Auditing Standards']",
149
+ "degree_names": "['Master of Science', 'Bachelor of Science']",
150
+ "professional_company_names": "['Company Name', 'Company Name', 'Company Name', 'Company Name']",
151
+ "start_dates": "['January 2016', 'February 2015', 'May 2010', 'September 1996']",
152
+ "end_dates": "['Current', 'January 2016', 'August 2011', 'March 2004']"
153
+ },
154
+ {
155
+ "id": 19,
156
+ "skills": "['Machine Learning', 'Software Developer', 'Software Engineering', 'ML Engineer', 'Model Building', 'Deep Learning', 'Numpy', 'Pandas', 'PySpark', 'Hadoop', 'Matplotlib', 'Keras', 'Tensorflow', 'TensorflowJS', 'AngularJS', 'Computer Vision', 'Natural Language Processing', 'HuggingFace', 'Data Wrangling', 'Scikit Learn', 'Algorithm Optimization']",
157
+ "degree_names": "['B.Tech']",
158
+ "professional_company_names": "['Larsen & Toubro']",
159
+ "start_dates": "['Jul 2019']",
160
+ "end_dates": "['Till Date']"
161
+ }
162
+ ]
data/cleaned/cleanedReq.json ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "skills": {
3
+ "items": {
4
+ "essential": [
5
+ "software development processes",
6
+ "testing and evaluating current networking systems",
7
+ "Java",
8
+ "Python",
9
+ "C++",
10
+ "scripting and test automation",
11
+ "rapid development cycles in a web-based environment"
12
+ ],
13
+ "preferred": [
14
+ "HTML5",
15
+ "CSS3",
16
+ "content management systems",
17
+ "Wicket",
18
+ "GWT",
19
+ "Spring MVC",
20
+ "relational databases",
21
+ "ORM and SQL technologies"
22
+ ]
23
+ },
24
+ "type": "object"
25
+ },
26
+ "experience": {
27
+ "items": [
28
+ {
29
+ "years": {
30
+ "professional": {
31
+ "minimum": 5,
32
+ "description": "as an engineer of software and networking platforms"
33
+ },
34
+ "academic": {
35
+ "minimum": 7,
36
+ "languages": [
37
+ "Java",
38
+ "Python",
39
+ "C++"
40
+ ]
41
+ }
42
+ },
43
+ "description": "with experience in software and networking platforms",
44
+ "type": "object"
45
+ }
46
+ ],
47
+ "type": "array"
48
+ },
49
+ "education": {
50
+ "items": [
51
+ {
52
+ "minimum": {
53
+ "years": 4,
54
+ "description": "as a software engineer or information technology student"
55
+ },
56
+ "type": "object"
57
+ }
58
+ ],
59
+ "type": "array"
60
+ },
61
+ "keywords": [
62
+ "innovative software solutions",
63
+ "software development",
64
+ "networking systems",
65
+ "growth",
66
+ "data",
67
+ "self-serve computing",
68
+ "reporting solutions",
69
+ "interactive querying",
70
+ "scalable software platforms",
71
+ "efficiency",
72
+ "networking solutions",
73
+ "continuous integration",
74
+ "software development lifecycle",
75
+ "distributed software",
76
+ "applications",
77
+ "synchronous and asynchronous design patterns",
78
+ "urgency",
79
+ "quality",
80
+ "global team",
81
+ "project operations",
82
+ "global technology stack",
83
+ "localized improvements",
84
+ "software and network system functionality",
85
+ "site reliability",
86
+ "next-wave product features",
87
+ "system capabilities"
88
+ ]
89
+ }
data/cleaned/jsonResult.json ADDED
File without changes
data/cleaned/reqs.csv ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ,skills_required,educationaL_requirements,experiencere_requirement
2
+ 361,"Azure
3
+ Big Data
4
+ Data Analytics
5
+ ETL Tools
6
+ Power BI
7
+ SQL",Bachelor of Science (BSc),5 to 8 years
8
+ 73,"Brand Promotion
9
+ Campaign Management
10
+ Field Supervision
11
+ Merchandising
12
+ promotional activities
13
+ Trade Marketing",Master of Business Administration (MBA),At least 3 years
14
+ 374,"AutoCAD
15
+ Solidworks",Bachelor’s degree in Mechanical Engineering from a reputed institute.,At least 5 years
16
+ 155,"AutoCAD
17
+ ETABS
18
+ Microsoft Office Suite
19
+ MS Project",B.Sc in Civil Engineering from a reputed university.,At least 5 years
20
+ 104,"Corporate Marketing
21
+ Facebook Ads Manager
22
+ Facebook campaign
23
+ Facebook marketing
24
+ Google Ads
25
+ Google analytics
26
+ Market Researc
27
+ Marketing
28
+ Search Engine Optimization (SEO)",Bachelor/Honors,At least 1 year
29
+ 394,"Python
30
+ R or Java
31
+ TensorFlow
32
+ PyTorch
33
+ Scikit-learn.","Bachelors or Masters degree in Computer Science, Engineering, or a related field.",
34
+ 377,"Python
35
+ R or Java
36
+ TensorFlow
37
+ PyTorch
38
+ Scikit-learn.","Bachelors or Masters degree in Computer Science, Engineering, or a related field.",
39
+ 124,"Business Analysis
40
+ Effective communication skills
41
+ Java
42
+ REST API Design
43
+ Soft Skills
44
+ Software Development",Bachelor of Science (BSc) in Computer Science & Engineering,At least 2 years
45
+ 68,"Ansible
46
+ AWS Cloud
47
+ Cloud Platform
48
+ DevOps
49
+ Dockers and Kubernetes
50
+ Java
51
+ ReactJS
52
+ Terraform",Bachelor/Honors,At least 3 years
53
+ 450,"Python
54
+ R or Java
55
+ TensorFlow
56
+ PyTorch
57
+ Scikit-learn.","Bachelors or Masters degree in Computer Science, Engineering, or a related field.",
58
+ 9,"Having CACC from reputed CA Firm
59
+ Internal Audit and Compliance","Master of Commerce (MCom) in Accounting, Must have CACC certification from any CA firm",2 to 4 years
60
+ 194,"Maintenance and Troubleshooting
61
+ Mechanical","Bachelor of Science (BSc) in Mechanical Engineering, Diploma in Mechanical",2 to 5 years
62
+ 406,"Business Analysis
63
+ Effective communication skills
64
+ Java
65
+ REST API Design
66
+ Soft Skills
67
+ Software Development",Bachelor of Science (BSc) in Computer Science & Engineering,At least 2 years
68
+ 84,"HRM Report
69
+ Human Resource Management
70
+ NGO","Masters degree in any discipline, Bachelor of Business Administration (BBA)",5 to 6 years
71
+ 371,"iOS
72
+ iOS App Developer
73
+ iOS Application Development
74
+ iOS Development
75
+ Mobile apps Developer (iOS)
76
+ Native IOS
77
+ Swift (iOS)
78
+ Swift UI",Bachelor of Science (BSc) in Computer Science,At least 4 years
79
+ 388,"ASP.NET MVC Strong understanding of database design
80
+ Database Administrator (DBA)
81
+ Database management
82
+ Elasticsearch
83
+ MongoDB
84
+ MySQL database
85
+ NoSQL database
86
+ REDIS",Bachelor of Science (BSc) in Computer Science,At least 1 year
87
+ 495,,Bachelor of Science (BSc) in Computer Science & Engineering,3 to 5 years
88
+ 30,"Cisco
89
+ Linux
90
+ Operation & Maintenance of Server",Bachelor/Honors,4 to 5 years
91
+ 316,"AUDIT AND INSPECTION
92
+ Banking
93
+ Internal Audit","Masters, Master of Business Administration (MBA), Master of Business Management (MBM)",At least 15 years
94
+ 408,"AutoCAD
95
+ Solidworks",Bachelor’s degree in Mechanical Engineering from a reputed institute.,At least 5 years
data/image/arch.png ADDED
data/raw/__init__.py ADDED
File without changes
data/raw/jobreq.txt ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Sample software engineer job description
2
+ At company x, our technology solves problems. We’ve established the company as a leading developer of innovative software solutions, and we’re looking for a highly skilled software engineer to join our program and network design team. The ideal candidate will have expert knowledge of software development processes, along with solid experience in testing and evaluating current networking systems. This person should be highly motivated in finding technical issues and fixing them with meticulous code.
3
+ test
4
+ Objectives of this role
5
+ Enhance existing platform and network capabilities to handle massive growth, enabling new insights and products based on data via self-serve computing, reporting solutions, and interactive querying
6
+ Visualize, design, and develop innovative software platforms as we continue to experience growth in the usage and visibility of our products
7
+ Create scalable software platforms and applications, as well as efficient networking solutions, that are unit tested, code reviewed, and checked regularly for continuous integration
8
+ Examine existing systems for flaws and create solutions that improve service uptime and time-to-resolve through monitoring and automated remediation
9
+ Plan and execute full software development lifecycle for each assigned project, adhering to company standards and expectations
10
+ Responsibilities
11
+ Design and build tools and frameworks to automate the development, testing, deployment, and management of services and products
12
+ Plan and scale distributed software and applications, using synchronous and asynchronous design patterns, writing code, and delivering with urgency and quality
13
+ Collaborate with global team to produce project plans and analyze the efficiency and feasibility of project operations, leveraging global technology stack and making localized improvements
14
+ Track, document, and maintain software and network system functionality, and leverage any opportunity to improve engineering
15
+ Focus on creating software and networking platforms that are free of faulty programming, and continuously keep developers in step without compromising site reliability
16
+ Work with product managers and user-experience designers to influence the strategy and delivery of next-wave product features and system capabilities
17
+ Required skills and qualifications
18
+ Five or more years of experience as engineer of software and networking platforms
19
+ Seven or more years of experience (professional and academic) with Java, Python, and C++
20
+ Proven ability to document design processes, including development, testing, analytics, and troubleshooting
21
+ Experience with rapid development cycles in a web-based environment
22
+ Strong ability in scripting and test automation
23
+ Desire to continue professional growth through training and education
24
+ Preferred skills and qualifications
25
+ Bachelor’s degree (or equivalent) in software engineering or information technology
26
+ Working knowledge of relational databases as well as ORM and SQL technologies
27
+ Proficiency with HTML5, CSS3, and content management systems
28
+ Web application development experience with multiple frameworks, including Wicket, GWT, and Spring MVC
params.yaml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ preprocess:
2
+ input: data/raw/resume_data.csv
3
+ outputcandidate: data/cleaned/candidates.csv
4
+ outputreq: data/cleaned/reqs.csv
5
+ inputJobreq: data/raw/jobreq.txt
6
+ jsonJobReq: data/cleaned/cleanedReq.json
7
+ jsoncandidate: data/cleaned/candidates.json
8
+ jsonResult: data/cleaned/result.json
9
+
requirements.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pandas
2
+ mlflow
3
+ numpy
4
+ notebook
5
+ scikit-learn
6
+ pyYAML
7
+ python-Box
8
+ joblib
9
+ ensure
10
+ dvc
11
+ dagshub
12
+ dvc_s3
13
+ agno
14
+ pydantic
15
+ groq
16
+ gradio
research/research.ipynb ADDED
@@ -0,0 +1,629 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 7,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import pandas as pd\n",
10
+ "import os\n",
11
+ "import yaml"
12
+ ]
13
+ },
14
+ {
15
+ "cell_type": "code",
16
+ "execution_count": 6,
17
+ "metadata": {},
18
+ "outputs": [
19
+ {
20
+ "data": {
21
+ "text/plain": [
22
+ "'/Volumes/sunny333/code/genAI/ResumeAiAgent'"
23
+ ]
24
+ },
25
+ "execution_count": 6,
26
+ "metadata": {},
27
+ "output_type": "execute_result"
28
+ }
29
+ ],
30
+ "source": [
31
+ "os.getcwd()"
32
+ ]
33
+ },
34
+ {
35
+ "cell_type": "code",
36
+ "execution_count": 5,
37
+ "metadata": {},
38
+ "outputs": [
39
+ {
40
+ "data": {
41
+ "text/plain": [
42
+ "'/Volumes/sunny333/code/genAI/ResumeAiAgent'"
43
+ ]
44
+ },
45
+ "execution_count": 5,
46
+ "metadata": {},
47
+ "output_type": "execute_result"
48
+ }
49
+ ],
50
+ "source": [
51
+ "new_path = \"/Volumes/sunny333/code/genAI/ResumeAiAgent/\"\n",
52
+ "os.chdir(new_path)\n",
53
+ "os.getcwd()"
54
+ ]
55
+ },
56
+ {
57
+ "cell_type": "code",
58
+ "execution_count": null,
59
+ "metadata": {},
60
+ "outputs": [],
61
+ "source": [
62
+ "\n"
63
+ ]
64
+ },
65
+ {
66
+ "cell_type": "code",
67
+ "execution_count": 8,
68
+ "metadata": {},
69
+ "outputs": [
70
+ {
71
+ "data": {
72
+ "text/plain": [
73
+ "{'input': 'data/raw/resume_data.csv', 'output': 'data/cleaned/data.csv'}"
74
+ ]
75
+ },
76
+ "execution_count": 8,
77
+ "metadata": {},
78
+ "output_type": "execute_result"
79
+ }
80
+ ],
81
+ "source": [
82
+ "path = \"params.yaml\"\n",
83
+ "params=yaml.safe_load(open(path))['preprocess']\n",
84
+ "params"
85
+ ]
86
+ },
87
+ {
88
+ "cell_type": "code",
89
+ "execution_count": 9,
90
+ "metadata": {},
91
+ "outputs": [
92
+ {
93
+ "data": {
94
+ "text/html": [
95
+ "<div>\n",
96
+ "<style scoped>\n",
97
+ " .dataframe tbody tr th:only-of-type {\n",
98
+ " vertical-align: middle;\n",
99
+ " }\n",
100
+ "\n",
101
+ " .dataframe tbody tr th {\n",
102
+ " vertical-align: top;\n",
103
+ " }\n",
104
+ "\n",
105
+ " .dataframe thead th {\n",
106
+ " text-align: right;\n",
107
+ " }\n",
108
+ "</style>\n",
109
+ "<table border=\"1\" class=\"dataframe\">\n",
110
+ " <thead>\n",
111
+ " <tr style=\"text-align: right;\">\n",
112
+ " <th></th>\n",
113
+ " <th>address</th>\n",
114
+ " <th>career_objective</th>\n",
115
+ " <th>skills</th>\n",
116
+ " <th>educational_institution_name</th>\n",
117
+ " <th>degree_names</th>\n",
118
+ " <th>passing_years</th>\n",
119
+ " <th>educational_results</th>\n",
120
+ " <th>result_types</th>\n",
121
+ " <th>major_field_of_studies</th>\n",
122
+ " <th>professional_company_names</th>\n",
123
+ " <th>...</th>\n",
124
+ " <th>online_links</th>\n",
125
+ " <th>issue_dates</th>\n",
126
+ " <th>expiry_dates</th>\n",
127
+ " <th>job_position_name</th>\n",
128
+ " <th>educationaL_requirements</th>\n",
129
+ " <th>experiencere_requirement</th>\n",
130
+ " <th>age_requirement</th>\n",
131
+ " <th>responsibilities.1</th>\n",
132
+ " <th>skills_required</th>\n",
133
+ " <th>matched_score</th>\n",
134
+ " </tr>\n",
135
+ " </thead>\n",
136
+ " <tbody>\n",
137
+ " <tr>\n",
138
+ " <th>0</th>\n",
139
+ " <td>NaN</td>\n",
140
+ " <td>Big data analytics working and database wareho...</td>\n",
141
+ " <td>['Big Data', 'Hadoop', 'Hive', 'Python', 'Mapr...</td>\n",
142
+ " <td>['The Amity School of Engineering &amp; Technology...</td>\n",
143
+ " <td>['B.Tech']</td>\n",
144
+ " <td>['2019']</td>\n",
145
+ " <td>['N/A']</td>\n",
146
+ " <td>[None]</td>\n",
147
+ " <td>['Electronics']</td>\n",
148
+ " <td>['Coca-COla']</td>\n",
149
+ " <td>...</td>\n",
150
+ " <td>NaN</td>\n",
151
+ " <td>NaN</td>\n",
152
+ " <td>NaN</td>\n",
153
+ " <td>Senior Software Engineer</td>\n",
154
+ " <td>B.Sc in Computer Science &amp; Engineering from a ...</td>\n",
155
+ " <td>At least 1 year</td>\n",
156
+ " <td>NaN</td>\n",
157
+ " <td>Technical Support\\nTroubleshooting\\nCollaborat...</td>\n",
158
+ " <td>NaN</td>\n",
159
+ " <td>0.850000</td>\n",
160
+ " </tr>\n",
161
+ " <tr>\n",
162
+ " <th>1</th>\n",
163
+ " <td>NaN</td>\n",
164
+ " <td>Fresher looking to join as a data analyst and ...</td>\n",
165
+ " <td>['Data Analysis', 'Data Analytics', 'Business ...</td>\n",
166
+ " <td>['Delhi University - Hansraj College', 'Delhi ...</td>\n",
167
+ " <td>['B.Sc (Maths)', 'M.Sc (Science) (Statistics)']</td>\n",
168
+ " <td>['2015', '2018']</td>\n",
169
+ " <td>['N/A', 'N/A']</td>\n",
170
+ " <td>['N/A', 'N/A']</td>\n",
171
+ " <td>['Mathematics', 'Statistics']</td>\n",
172
+ " <td>['BIB Consultancy']</td>\n",
173
+ " <td>...</td>\n",
174
+ " <td>NaN</td>\n",
175
+ " <td>NaN</td>\n",
176
+ " <td>NaN</td>\n",
177
+ " <td>Machine Learning (ML) Engineer</td>\n",
178
+ " <td>M.Sc in Computer Science &amp; Engineering or in a...</td>\n",
179
+ " <td>At least 5 year(s)</td>\n",
180
+ " <td>NaN</td>\n",
181
+ " <td>Machine Learning Leadership\\nCross-Functional ...</td>\n",
182
+ " <td>NaN</td>\n",
183
+ " <td>0.750000</td>\n",
184
+ " </tr>\n",
185
+ " <tr>\n",
186
+ " <th>2</th>\n",
187
+ " <td>NaN</td>\n",
188
+ " <td>NaN</td>\n",
189
+ " <td>['Software Development', 'Machine Learning', '...</td>\n",
190
+ " <td>['Birla Institute of Technology (BIT), Ranchi']</td>\n",
191
+ " <td>['B.Tech']</td>\n",
192
+ " <td>['2018']</td>\n",
193
+ " <td>['N/A']</td>\n",
194
+ " <td>['N/A']</td>\n",
195
+ " <td>['Electronics/Telecommunication']</td>\n",
196
+ " <td>['Axis Bank Limited']</td>\n",
197
+ " <td>...</td>\n",
198
+ " <td>NaN</td>\n",
199
+ " <td>NaN</td>\n",
200
+ " <td>NaN</td>\n",
201
+ " <td>Executive/ Senior Executive- Trade Marketing, ...</td>\n",
202
+ " <td>Master of Business Administration (MBA)</td>\n",
203
+ " <td>At least 3 years</td>\n",
204
+ " <td>NaN</td>\n",
205
+ " <td>Trade Marketing Executive\\nBrand Visibility, S...</td>\n",
206
+ " <td>Brand Promotion\\nCampaign Management\\nField Su...</td>\n",
207
+ " <td>0.416667</td>\n",
208
+ " </tr>\n",
209
+ " <tr>\n",
210
+ " <th>3</th>\n",
211
+ " <td>NaN</td>\n",
212
+ " <td>To obtain a position in a fast-paced business ...</td>\n",
213
+ " <td>['accounts payables', 'accounts receivables', ...</td>\n",
214
+ " <td>['Martinez Adult Education, Business Training ...</td>\n",
215
+ " <td>['Computer Applications Specialist Certificate...</td>\n",
216
+ " <td>['2008']</td>\n",
217
+ " <td>[None]</td>\n",
218
+ " <td>[None]</td>\n",
219
+ " <td>['Computer Applications']</td>\n",
220
+ " <td>['Company Name ï¼ City , State', 'Company Name...</td>\n",
221
+ " <td>...</td>\n",
222
+ " <td>NaN</td>\n",
223
+ " <td>NaN</td>\n",
224
+ " <td>NaN</td>\n",
225
+ " <td>Business Development Executive</td>\n",
226
+ " <td>Bachelor/Honors</td>\n",
227
+ " <td>1 to 3 years</td>\n",
228
+ " <td>Age 22 to 30 years</td>\n",
229
+ " <td>Apparel Sourcing\\nQuality Garment Sourcing\\nRe...</td>\n",
230
+ " <td>Fast typing skill\\nIELTSInternet browsing &amp; on...</td>\n",
231
+ " <td>0.760000</td>\n",
232
+ " </tr>\n",
233
+ " <tr>\n",
234
+ " <th>4</th>\n",
235
+ " <td>NaN</td>\n",
236
+ " <td>Professional accountant with an outstanding wo...</td>\n",
237
+ " <td>['Analytical reasoning', 'Compliance testing k...</td>\n",
238
+ " <td>['Kent State University']</td>\n",
239
+ " <td>['Bachelor of Business Administration']</td>\n",
240
+ " <td>[None]</td>\n",
241
+ " <td>['3.84']</td>\n",
242
+ " <td>[None]</td>\n",
243
+ " <td>['Accounting']</td>\n",
244
+ " <td>['Company Name', 'Company Name', 'Company Name...</td>\n",
245
+ " <td>...</td>\n",
246
+ " <td>[None]</td>\n",
247
+ " <td>[None]</td>\n",
248
+ " <td>['February 15, 2021']</td>\n",
249
+ " <td>Senior iOS Engineer</td>\n",
250
+ " <td>Bachelor of Science (BSc) in Computer Science</td>\n",
251
+ " <td>At least 4 years</td>\n",
252
+ " <td>NaN</td>\n",
253
+ " <td>iOS Lifecycle\\nRequirement Analysis\\nNative Fr...</td>\n",
254
+ " <td>iOS\\niOS App Developer\\niOS Application Develo...</td>\n",
255
+ " <td>0.650000</td>\n",
256
+ " </tr>\n",
257
+ " </tbody>\n",
258
+ "</table>\n",
259
+ "<p>5 rows × 35 columns</p>\n",
260
+ "</div>"
261
+ ],
262
+ "text/plain": [
263
+ " address career_objective \\\n",
264
+ "0 NaN Big data analytics working and database wareho... \n",
265
+ "1 NaN Fresher looking to join as a data analyst and ... \n",
266
+ "2 NaN NaN \n",
267
+ "3 NaN To obtain a position in a fast-paced business ... \n",
268
+ "4 NaN Professional accountant with an outstanding wo... \n",
269
+ "\n",
270
+ " skills \\\n",
271
+ "0 ['Big Data', 'Hadoop', 'Hive', 'Python', 'Mapr... \n",
272
+ "1 ['Data Analysis', 'Data Analytics', 'Business ... \n",
273
+ "2 ['Software Development', 'Machine Learning', '... \n",
274
+ "3 ['accounts payables', 'accounts receivables', ... \n",
275
+ "4 ['Analytical reasoning', 'Compliance testing k... \n",
276
+ "\n",
277
+ " educational_institution_name \\\n",
278
+ "0 ['The Amity School of Engineering & Technology... \n",
279
+ "1 ['Delhi University - Hansraj College', 'Delhi ... \n",
280
+ "2 ['Birla Institute of Technology (BIT), Ranchi'] \n",
281
+ "3 ['Martinez Adult Education, Business Training ... \n",
282
+ "4 ['Kent State University'] \n",
283
+ "\n",
284
+ " degree_names passing_years \\\n",
285
+ "0 ['B.Tech'] ['2019'] \n",
286
+ "1 ['B.Sc (Maths)', 'M.Sc (Science) (Statistics)'] ['2015', '2018'] \n",
287
+ "2 ['B.Tech'] ['2018'] \n",
288
+ "3 ['Computer Applications Specialist Certificate... ['2008'] \n",
289
+ "4 ['Bachelor of Business Administration'] [None] \n",
290
+ "\n",
291
+ " educational_results result_types major_field_of_studies \\\n",
292
+ "0 ['N/A'] [None] ['Electronics'] \n",
293
+ "1 ['N/A', 'N/A'] ['N/A', 'N/A'] ['Mathematics', 'Statistics'] \n",
294
+ "2 ['N/A'] ['N/A'] ['Electronics/Telecommunication'] \n",
295
+ "3 [None] [None] ['Computer Applications'] \n",
296
+ "4 ['3.84'] [None] ['Accounting'] \n",
297
+ "\n",
298
+ " professional_company_names ... online_links \\\n",
299
+ "0 ['Coca-COla'] ... NaN \n",
300
+ "1 ['BIB Consultancy'] ... NaN \n",
301
+ "2 ['Axis Bank Limited'] ... NaN \n",
302
+ "3 ['Company Name ï¼ City , State', 'Company Name... ... NaN \n",
303
+ "4 ['Company Name', 'Company Name', 'Company Name... ... [None] \n",
304
+ "\n",
305
+ " issue_dates expiry_dates \\\n",
306
+ "0 NaN NaN \n",
307
+ "1 NaN NaN \n",
308
+ "2 NaN NaN \n",
309
+ "3 NaN NaN \n",
310
+ "4 [None] ['February 15, 2021'] \n",
311
+ "\n",
312
+ " job_position_name \\\n",
313
+ "0 Senior Software Engineer \n",
314
+ "1 Machine Learning (ML) Engineer \n",
315
+ "2 Executive/ Senior Executive- Trade Marketing, ... \n",
316
+ "3 Business Development Executive \n",
317
+ "4 Senior iOS Engineer \n",
318
+ "\n",
319
+ " educationaL_requirements experiencere_requirement \\\n",
320
+ "0 B.Sc in Computer Science & Engineering from a ... At least 1 year \n",
321
+ "1 M.Sc in Computer Science & Engineering or in a... At least 5 year(s) \n",
322
+ "2 Master of Business Administration (MBA) At least 3 years \n",
323
+ "3 Bachelor/Honors 1 to 3 years \n",
324
+ "4 Bachelor of Science (BSc) in Computer Science At least 4 years \n",
325
+ "\n",
326
+ " age_requirement responsibilities.1 \\\n",
327
+ "0 NaN Technical Support\\nTroubleshooting\\nCollaborat... \n",
328
+ "1 NaN Machine Learning Leadership\\nCross-Functional ... \n",
329
+ "2 NaN Trade Marketing Executive\\nBrand Visibility, S... \n",
330
+ "3 Age 22 to 30 years Apparel Sourcing\\nQuality Garment Sourcing\\nRe... \n",
331
+ "4 NaN iOS Lifecycle\\nRequirement Analysis\\nNative Fr... \n",
332
+ "\n",
333
+ " skills_required matched_score \n",
334
+ "0 NaN 0.850000 \n",
335
+ "1 NaN 0.750000 \n",
336
+ "2 Brand Promotion\\nCampaign Management\\nField Su... 0.416667 \n",
337
+ "3 Fast typing skill\\nIELTSInternet browsing & on... 0.760000 \n",
338
+ "4 iOS\\niOS App Developer\\niOS Application Develo... 0.650000 \n",
339
+ "\n",
340
+ "[5 rows x 35 columns]"
341
+ ]
342
+ },
343
+ "execution_count": 9,
344
+ "metadata": {},
345
+ "output_type": "execute_result"
346
+ }
347
+ ],
348
+ "source": [
349
+ "data = pd.read_csv(params['input'])\n",
350
+ "data.head()"
351
+ ]
352
+ },
353
+ {
354
+ "cell_type": "code",
355
+ "execution_count": 11,
356
+ "metadata": {},
357
+ "outputs": [],
358
+ "source": [
359
+ "from pydantic import BaseModel,ValidationError"
360
+ ]
361
+ },
362
+ {
363
+ "cell_type": "code",
364
+ "execution_count": 12,
365
+ "metadata": {},
366
+ "outputs": [],
367
+ "source": [
368
+ "given_requirments = {\n",
369
+ " \"skills\": [\"Python\", \"TensorFlow\", \"ML\"],\n",
370
+ " \"experience\": \"3+ years\",\n",
371
+ " \"education\": \"Bachelor's in CS or equivalent\",\n",
372
+ " \"keywords\": [\"data pipeline\", \"model training\", \"deployment\"]\n",
373
+ "}"
374
+ ]
375
+ },
376
+ {
377
+ "cell_type": "code",
378
+ "execution_count": 17,
379
+ "metadata": {},
380
+ "outputs": [],
381
+ "source": [
382
+ "class MyRequirments(BaseModel):\n",
383
+ " skills: list\n",
384
+ " experience:str\n",
385
+ " education:str\n",
386
+ " keywords:list\n"
387
+ ]
388
+ },
389
+ {
390
+ "cell_type": "code",
391
+ "execution_count": 19,
392
+ "metadata": {},
393
+ "outputs": [
394
+ {
395
+ "name": "stdout",
396
+ "output_type": "stream",
397
+ "text": [
398
+ "valid\n"
399
+ ]
400
+ }
401
+ ],
402
+ "source": [
403
+ "try:\n",
404
+ " MyRequirments(**given_requirments) \n",
405
+ " print(\"valid\")\n",
406
+ "except ValidationError as e:\n",
407
+ " print(\"-----\",e.errors())"
408
+ ]
409
+ },
410
+ {
411
+ "cell_type": "code",
412
+ "execution_count": null,
413
+ "metadata": {},
414
+ "outputs": [],
415
+ "source": [
416
+ "class User(BaseModel):\n",
417
+ " skills: list\n",
418
+ " experience:str\n",
419
+ " education:str\n",
420
+ " \n",
421
+ " \n",
422
+ " "
423
+ ]
424
+ },
425
+ {
426
+ "cell_type": "code",
427
+ "execution_count": 21,
428
+ "metadata": {},
429
+ "outputs": [
430
+ {
431
+ "data": {
432
+ "text/plain": [
433
+ "Index(['address', 'career_objective', 'skills', 'educational_institution_name',\n",
434
+ " 'degree_names', 'passing_years', 'educational_results', 'result_types',\n",
435
+ " 'major_field_of_studies', 'professional_company_names', 'company_urls',\n",
436
+ " 'start_dates', 'end_dates', 'related_skils_in_job', 'positions',\n",
437
+ " 'locations', 'responsibilities', 'extra_curricular_activity_types',\n",
438
+ " 'extra_curricular_organization_names',\n",
439
+ " 'extra_curricular_organization_links', 'role_positions', 'languages',\n",
440
+ " 'proficiency_levels', 'certification_providers', 'certification_skills',\n",
441
+ " 'online_links', 'issue_dates', 'expiry_dates', 'job_position_name',\n",
442
+ " 'educationaL_requirements', 'experiencere_requirement',\n",
443
+ " 'age_requirement', 'responsibilities.1', 'skills_required',\n",
444
+ " 'matched_score'],\n",
445
+ " dtype='object')"
446
+ ]
447
+ },
448
+ "execution_count": 21,
449
+ "metadata": {},
450
+ "output_type": "execute_result"
451
+ }
452
+ ],
453
+ "source": [
454
+ "data.columns"
455
+ ]
456
+ },
457
+ {
458
+ "cell_type": "code",
459
+ "execution_count": null,
460
+ "metadata": {},
461
+ "outputs": [],
462
+ "source": [
463
+ " = [\"skills\",\"degree_names\",\"professional_company_names\",\n",
464
+ " \"related_skils_in_job\",'start_dates', 'end_dates'\n",
465
+ " ]\n",
466
+ "req_colums=[\"skills_required\",\"educationaL_requirements\",\"experiencere_requirement\"]user_colums"
467
+ ]
468
+ },
469
+ {
470
+ "cell_type": "code",
471
+ "execution_count": 23,
472
+ "metadata": {},
473
+ "outputs": [
474
+ {
475
+ "data": {
476
+ "text/html": [
477
+ "<div>\n",
478
+ "<style scoped>\n",
479
+ " .dataframe tbody tr th:only-of-type {\n",
480
+ " vertical-align: middle;\n",
481
+ " }\n",
482
+ "\n",
483
+ " .dataframe tbody tr th {\n",
484
+ " vertical-align: top;\n",
485
+ " }\n",
486
+ "\n",
487
+ " .dataframe thead th {\n",
488
+ " text-align: right;\n",
489
+ " }\n",
490
+ "</style>\n",
491
+ "<table border=\"1\" class=\"dataframe\">\n",
492
+ " <thead>\n",
493
+ " <tr style=\"text-align: right;\">\n",
494
+ " <th></th>\n",
495
+ " <th>skills</th>\n",
496
+ " <th>degree_names</th>\n",
497
+ " <th>professional_company_names</th>\n",
498
+ " <th>related_skils_in_job</th>\n",
499
+ " <th>start_dates</th>\n",
500
+ " <th>end_dates</th>\n",
501
+ " </tr>\n",
502
+ " </thead>\n",
503
+ " <tbody>\n",
504
+ " <tr>\n",
505
+ " <th>0</th>\n",
506
+ " <td>['Big Data', 'Hadoop', 'Hive', 'Python', 'Mapr...</td>\n",
507
+ " <td>['B.Tech']</td>\n",
508
+ " <td>['Coca-COla']</td>\n",
509
+ " <td>[['Big Data']]</td>\n",
510
+ " <td>['Nov 2019']</td>\n",
511
+ " <td>['Till Date']</td>\n",
512
+ " </tr>\n",
513
+ " <tr>\n",
514
+ " <th>1</th>\n",
515
+ " <td>['Data Analysis', 'Data Analytics', 'Business ...</td>\n",
516
+ " <td>['B.Sc (Maths)', 'M.Sc (Science) (Statistics)']</td>\n",
517
+ " <td>['BIB Consultancy']</td>\n",
518
+ " <td>[['Data Analysis', 'Business Analysis', 'Machi...</td>\n",
519
+ " <td>['Sep 2019']</td>\n",
520
+ " <td>['Till Date']</td>\n",
521
+ " </tr>\n",
522
+ " <tr>\n",
523
+ " <th>2</th>\n",
524
+ " <td>['Software Development', 'Machine Learning', '...</td>\n",
525
+ " <td>['B.Tech']</td>\n",
526
+ " <td>['Axis Bank Limited']</td>\n",
527
+ " <td>[['Unified Payment Interface', 'Risk Predictio...</td>\n",
528
+ " <td>['June 2018']</td>\n",
529
+ " <td>['Till Date']</td>\n",
530
+ " </tr>\n",
531
+ " <tr>\n",
532
+ " <th>3</th>\n",
533
+ " <td>['accounts payables', 'accounts receivables', ...</td>\n",
534
+ " <td>['Computer Applications Specialist Certificate...</td>\n",
535
+ " <td>['Company Name ï¼ City , State', 'Company Name...</td>\n",
536
+ " <td>[['accounts receivables', 'banking', 'G/L Acco...</td>\n",
537
+ " <td>['January 2011', 'January 2008', 'January 2006...</td>\n",
538
+ " <td>['November 2015', 'January 2010', 'January 200...</td>\n",
539
+ " </tr>\n",
540
+ " <tr>\n",
541
+ " <th>4</th>\n",
542
+ " <td>['Analytical reasoning', 'Compliance testing k...</td>\n",
543
+ " <td>['Bachelor of Business Administration']</td>\n",
544
+ " <td>['Company Name', 'Company Name', 'Company Name...</td>\n",
545
+ " <td>[['collections', 'accounts receivable', 'finan...</td>\n",
546
+ " <td>['January 2016', 'January 2016', 'January 2012...</td>\n",
547
+ " <td>['Current', 'January 2016', 'January 2015', 'J...</td>\n",
548
+ " </tr>\n",
549
+ " </tbody>\n",
550
+ "</table>\n",
551
+ "</div>"
552
+ ],
553
+ "text/plain": [
554
+ " skills \\\n",
555
+ "0 ['Big Data', 'Hadoop', 'Hive', 'Python', 'Mapr... \n",
556
+ "1 ['Data Analysis', 'Data Analytics', 'Business ... \n",
557
+ "2 ['Software Development', 'Machine Learning', '... \n",
558
+ "3 ['accounts payables', 'accounts receivables', ... \n",
559
+ "4 ['Analytical reasoning', 'Compliance testing k... \n",
560
+ "\n",
561
+ " degree_names \\\n",
562
+ "0 ['B.Tech'] \n",
563
+ "1 ['B.Sc (Maths)', 'M.Sc (Science) (Statistics)'] \n",
564
+ "2 ['B.Tech'] \n",
565
+ "3 ['Computer Applications Specialist Certificate... \n",
566
+ "4 ['Bachelor of Business Administration'] \n",
567
+ "\n",
568
+ " professional_company_names \\\n",
569
+ "0 ['Coca-COla'] \n",
570
+ "1 ['BIB Consultancy'] \n",
571
+ "2 ['Axis Bank Limited'] \n",
572
+ "3 ['Company Name ï¼ City , State', 'Company Name... \n",
573
+ "4 ['Company Name', 'Company Name', 'Company Name... \n",
574
+ "\n",
575
+ " related_skils_in_job \\\n",
576
+ "0 [['Big Data']] \n",
577
+ "1 [['Data Analysis', 'Business Analysis', 'Machi... \n",
578
+ "2 [['Unified Payment Interface', 'Risk Predictio... \n",
579
+ "3 [['accounts receivables', 'banking', 'G/L Acco... \n",
580
+ "4 [['collections', 'accounts receivable', 'finan... \n",
581
+ "\n",
582
+ " start_dates \\\n",
583
+ "0 ['Nov 2019'] \n",
584
+ "1 ['Sep 2019'] \n",
585
+ "2 ['June 2018'] \n",
586
+ "3 ['January 2011', 'January 2008', 'January 2006... \n",
587
+ "4 ['January 2016', 'January 2016', 'January 2012... \n",
588
+ "\n",
589
+ " end_dates \n",
590
+ "0 ['Till Date'] \n",
591
+ "1 ['Till Date'] \n",
592
+ "2 ['Till Date'] \n",
593
+ "3 ['November 2015', 'January 2010', 'January 200... \n",
594
+ "4 ['Current', 'January 2016', 'January 2015', 'J... "
595
+ ]
596
+ },
597
+ "execution_count": 23,
598
+ "metadata": {},
599
+ "output_type": "execute_result"
600
+ }
601
+ ],
602
+ "source": [
603
+ "data_user = data[user_colums]\n",
604
+ "data_user.head()"
605
+ ]
606
+ }
607
+ ],
608
+ "metadata": {
609
+ "kernelspec": {
610
+ "display_name": ".venv",
611
+ "language": "python",
612
+ "name": "python3"
613
+ },
614
+ "language_info": {
615
+ "codemirror_mode": {
616
+ "name": "ipython",
617
+ "version": 3
618
+ },
619
+ "file_extension": ".py",
620
+ "mimetype": "text/x-python",
621
+ "name": "python",
622
+ "nbconvert_exporter": "python",
623
+ "pygments_lexer": "ipython3",
624
+ "version": "3.13.1"
625
+ }
626
+ },
627
+ "nbformat": 4,
628
+ "nbformat_minor": 2
629
+ }
schema.yaml ADDED
File without changes
src/AI_AGENT_RESUME_SELECTOR/__init__.py ADDED
File without changes
src/AI_AGENT_RESUME_SELECTOR/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (224 Bytes). View file
 
src/AI_AGENT_RESUME_SELECTOR/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (180 Bytes). View file
 
src/AI_AGENT_RESUME_SELECTOR/components/__init__.py ADDED
File without changes
src/AI_AGENT_RESUME_SELECTOR/components/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (235 Bytes). View file
 
src/AI_AGENT_RESUME_SELECTOR/components/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (191 Bytes). View file
 
src/AI_AGENT_RESUME_SELECTOR/components/__pycache__/canddidatePreprocessingAgent.cpython-312.pyc ADDED
Binary file (3.97 kB). View file
 
src/AI_AGENT_RESUME_SELECTOR/components/__pycache__/canddidatePreprocessingAgent.cpython-313.pyc ADDED
Binary file (3.94 kB). View file
 
src/AI_AGENT_RESUME_SELECTOR/components/__pycache__/jobpreprocessingAgent.cpython-313.pyc ADDED
Binary file (2.26 kB). View file
 
src/AI_AGENT_RESUME_SELECTOR/components/__pycache__/myagent.cpython-313.pyc ADDED
Binary file (7.02 kB). View file
 
src/AI_AGENT_RESUME_SELECTOR/components/agentUI.py ADDED
File without changes
src/AI_AGENT_RESUME_SELECTOR/components/canddidatePreprocessingAgent.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.AI_AGENT_RESUME_SELECTOR.components.entity import projectEntity
2
+ import os
3
+ import yaml
4
+ from agno.agent import Agent, RunResponse
5
+ from agno.models.groq import Groq
6
+ import json
7
+ import re
8
+ import csv
9
+
10
+ #api_key = os.environ.get("GROQ_API_KEY")
11
+ api_key = os.environ.get("GROQ_API_KEY")
12
+ print("-------key",api_key)
13
+ path = "params.yaml"
14
+ params=yaml.safe_load(open(path))['preprocess']
15
+ print("loading YAML",path)
16
+
17
+ def convert_file_json():
18
+ results = []
19
+ with open(params['outputcandidate'], newline='', encoding='utf-8') as csvfile:
20
+ reader = csv.DictReader(csvfile)
21
+ for index, row in enumerate(reader):
22
+ # Keep the values as plain strings
23
+ record = {
24
+ "id":index,
25
+ "skills": row.get("skills", "").strip(),
26
+ "degree_names": row.get("degree_names", "").strip(),
27
+ "professional_company_names": row.get("professional_company_names", "").strip(),
28
+ "start_dates": row.get("start_dates", "").strip(),
29
+ "end_dates": row.get("end_dates", "").strip()
30
+ }
31
+ results.append(record)
32
+ with open(params['jsoncandidate'], 'w') as f:
33
+ json.dump(results, f, indent=4)
34
+
35
+ convert_file_json()
36
+
37
+ def read_job_req()-> json:
38
+ """this function returns the job requirment in form of json"""
39
+ with open(params['jsonJobReq'], "r", encoding="utf-8") as f:
40
+ data = json.load(f)
41
+ print("job data---",data)
42
+ return str(data)
43
+
44
+ def read_candidates()-> json:
45
+ """this function returns list of candidates in json form"""
46
+ with open(params['jsoncandidate'], "r", encoding="utf-8") as f:
47
+ data = json.load(f)
48
+ print("candidate data---",data)
49
+ return str(data)
50
+
51
+ agent = Agent(
52
+ name="candidate agent",
53
+ model= Groq(
54
+ id="llama-3.1-8b-instant",
55
+ api_key = api_key
56
+ ),
57
+ role="loads the candidates list",
58
+ description= "you are an recruiter which recruits candidates based on given job requirments",
59
+ instructions=["""
60
+ read_candidates will give you list of candidates in this format.
61
+ record = {
62
+ "id":index,
63
+ "skills": ,
64
+ "degree_names": ,
65
+ "professional_company_names":
66
+ "start_dates":
67
+ "end_dates":
68
+ }
69
+
70
+ """],
71
+ tools=[read_candidates],
72
+ show_tool_calls=True,
73
+ markdown=True
74
+ )
75
+
76
+
src/AI_AGENT_RESUME_SELECTOR/components/dataIngestion.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import os
3
+ import yaml
4
+
5
+ path = "params.yaml"
6
+ params=yaml.safe_load(open(path))['preprocess']
7
+ print("loading YAML",path)
8
+
9
+ candidate_colums = ["skills","degree_names","professional_company_names",
10
+ "related_skils_in_job",'start_dates', 'end_dates'
11
+ ]
12
+
13
+ req_colums=["skills_required","educationaL_requirements",
14
+ "experiencere_requirement"]
15
+
16
+ data = pd.read_csv(params["input"])
17
+ data = data.sample(n=500, random_state=42)
18
+ data.to_csv(params['input'])
19
+ print("preprocessing data")
20
+ candidates = data[candidate_colums]
21
+ candidates= candidates[0:20]
22
+ reqs = data[req_colums]
23
+ reqs = reqs[0:20]
24
+ candidates.to_csv(params['outputcandidate'])
25
+ reqs.to_csv(params['outputreq'])
26
+ print("----data preprocesse sucessfully----")
src/AI_AGENT_RESUME_SELECTOR/components/entity/__init__.py ADDED
File without changes
src/AI_AGENT_RESUME_SELECTOR/components/entity/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (242 Bytes). View file
 
src/AI_AGENT_RESUME_SELECTOR/components/entity/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (198 Bytes). View file
 
src/AI_AGENT_RESUME_SELECTOR/components/entity/__pycache__/projectEntity.cpython-312.pyc ADDED
Binary file (1.04 kB). View file
 
src/AI_AGENT_RESUME_SELECTOR/components/entity/__pycache__/projectEntity.cpython-313.pyc ADDED
Binary file (1.1 kB). View file
 
src/AI_AGENT_RESUME_SELECTOR/components/entity/projectEntity.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel,ValidationError
2
+
3
+ class MyRequirments(BaseModel):
4
+ skills: list
5
+ experience:list
6
+ education:list
7
+ keywords:list
8
+
9
+ class Candidate(BaseModel):
10
+ skills: list
11
+ experience:list
12
+ education:list
13
+ company:list
14
+
15
+ class Selection(BaseModel):
16
+ candidate:Candidate
src/AI_AGENT_RESUME_SELECTOR/components/jobpreprocessingAgent.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.AI_AGENT_RESUME_SELECTOR.components.entity import projectEntity
2
+
3
+ import os
4
+ import yaml
5
+ from agno.agent import Agent, RunResponse
6
+ from agno.models.groq import Groq
7
+ import json
8
+ import re
9
+ #api_key = os.environ.get("GROQ_API_KEY")
10
+ api_key = os.environ.get("GROQ_API_KEY")
11
+ print("-------key",api_key)
12
+ path = "params.yaml"
13
+ params=yaml.safe_load(open(path))['preprocess']
14
+ print("loading YAML",path)
15
+
16
+ def load_jd(file_path):
17
+ with open(file_path, 'r', encoding='utf-8') as f:
18
+ return f.read()
19
+
20
+ agent = Agent(
21
+ name="jobprocessing agent",
22
+ model= Groq(
23
+ id="llama-3.1-8b-instant",
24
+ api_key = api_key
25
+ ),
26
+ role="loads job requirment",
27
+ description='''You are a recruitment assistant. From a given job description, extract and return a structured JSON like this:
28
+
29
+ {
30
+ "skills": [...],
31
+ "experience": [...],
32
+ "education": [...],
33
+ "keywords": [...]
34
+ }
35
+
36
+ Do not wrap the response in code blocks or Python variables.
37
+ Only return valid JSON.
38
+ ''',
39
+ instructions=["return in json form"
40
+ ],
41
+ response_model=projectEntity.MyRequirments,
42
+ )
43
+
44
+ txt = load_jd(params['inputJobreq'])
45
+ response = agent.run(txt)
46
+ print(">>>>>> here is respone >>>",f"data{response.content}")
47
+ op = json.loads(response.content)
48
+
49
+ with open(params['jsonJobReq'], "w") as f:
50
+ print("------",params['jsonJobReq'])
51
+ json.dump(op, f, indent=4)
52
+
src/AI_AGENT_RESUME_SELECTOR/components/myagent.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.AI_AGENT_RESUME_SELECTOR.components import canddidatePreprocessingAgent as ca
2
+ import gradio as gr
3
+ from src.AI_AGENT_RESUME_SELECTOR.components import jobpreprocessingAgent as ja
4
+ import os
5
+ from agno.team.team import Team
6
+ from agno.models.groq import Groq
7
+
8
+ #--- setting api key----
9
+ api_key = ""
10
+ def set_api_key(api_key):
11
+ os.environ["GROQ_API_KEY"] = api_key
12
+ return "API Key saved successfully!"
13
+
14
+ # Function to process the uploaded text file
15
+ def read_txt_file(file):
16
+ if file is None:
17
+ return "No file uploaded."
18
+ with open(file.name, 'r', encoding='utf-8') as f:
19
+ content = f.read()
20
+ return content
21
+
22
+ def agent_response(text,selection,file):
23
+ print(">>>>>>>>>updated key:--",os.environ.get("GROQ_API_KEY"))
24
+ try:
25
+ req = ""
26
+ print(">>>>>>>selected option:",selection)
27
+ if selection == "use default job requirments":
28
+ req = ca.read_job_req()
29
+ req = str(req)
30
+ elif selection == "upload my custom job requirments" :
31
+ req = extract_requirment(file)
32
+ else:
33
+ return "no file uploaed","no file uploaded"
34
+
35
+ ca.convert_file_json()
36
+ response = ca.agent.run(f"here is job requirments{req}.Here is what i want : {text}")
37
+ resp = f"here is reponse:-------- {str(response.content)}"
38
+ return resp,selection + req
39
+ except Exception:
40
+ return "error occured","error occured"
41
+
42
+ def agent_response2(text,selection,file):
43
+ print(">>>>>>>>>updated key:--",os.environ.get("GROQ_API_KEY"))
44
+ try:
45
+
46
+ ca.convert_file_json()
47
+ response = manager.run(text)
48
+ print(response)
49
+ return response.content,"req"
50
+ except Exception:
51
+ return "error occured","error occured"
52
+
53
+ def extract_requirment(file):
54
+ text = read_txt_file(file)
55
+ text = str(text)
56
+ response = ja.agent.run(text)
57
+ return response.content
58
+
59
+
60
+ with gr.Blocks(title="AI Recruiter Agent") as face:
61
+
62
+ gr.Markdown("## AI Recruiter Agent")
63
+ gr.Markdown("### Created by [Sunny Kumar](https://www.linkedin.com/in/sunny-kumar-b232417a/)")
64
+ with gr.Tabs():
65
+ with gr.Tab("Main App"):
66
+ with gr.Row():
67
+ textbox_input = gr.Textbox(lines=2, placeholder="enter text here", label="Your Query")
68
+ file_input = gr.File(file_types=[".txt"], label="Upload Job Requirements.")
69
+
70
+ with gr.Row():
71
+ radio_input = gr.Radio(
72
+ choices=["use default job requirments", "upload my custom job requirments"],
73
+ label="Pick one",
74
+ value="use default job requirments"
75
+ )
76
+
77
+ with gr.Row():
78
+ output_candidate = gr.Textbox(label="Selected Candidate")
79
+ output_skills = gr.Textbox(label="Skills Extracted")
80
+
81
+ submit_button = gr.Button("Submit")
82
+
83
+ examples = gr.Examples(
84
+ examples=[
85
+ ["select the best candidate for my job requirment based on skills"],
86
+ ["list only the id of candidates who are suitable for job"]
87
+ ],
88
+ inputs=[textbox_input]
89
+ )
90
+
91
+ submit_button.click(
92
+ fn=agent_response2,
93
+ inputs=[textbox_input, radio_input, file_input],
94
+ outputs=[output_candidate, output_skills]
95
+ )
96
+ with gr.Tab("API"):
97
+ gr.Markdown("## Manage groq API Keys")
98
+ api_key_input = gr.Textbox(label="Enter API Key", type="password")
99
+ save_btn = gr.Button("Save API Key")
100
+ api_status = gr.Textbox(label="Status", interactive=False)
101
+ save_btn.click(set_api_key, inputs=api_key_input, outputs=api_status)
102
+
103
+ with gr.Tab("Image"):
104
+ gr.Markdown("## Architecture")
105
+ image = gr.Image(value="data/image/arch.png", label="© sunny kumar", type="filepath")
106
+
107
+
108
+ #--------------multi agent handler--------
109
+ manager = Team(
110
+ name="manager",
111
+ mode="coordinate",
112
+ model= Groq(
113
+ id="llama-3.1-8b-instant",
114
+ api_key = api_key
115
+ ),
116
+ members=[ja.agent ,ca.agent],
117
+ description="""you are manager with selects candidates based on job
118
+ requirments and candidates list .
119
+ first ask "jobprocessing agent" to load the job requirment.
120
+ then ask "candidate agent" to load the candidate list.
121
+ then select the best candate based on supplied criteria.
122
+ 1. load the job requirment using ja.agent
123
+ 2.load the candidate list using ca.agent
124
+ 3. select the best candidate
125
+ 4. return the list of selected candidate
126
+ important:dont add any fake data
127
+ """,
128
+ enable_agentic_context=True, # Allow the agent to maintain a shared context and send that to members.
129
+ share_member_interactions=True, # Share all member responses with subsequent member requests.
130
+ show_members_responses=True,
131
+ markdown=True,
132
+ success_criteria= "selected list of candidate"
133
+
134
+ )
src/__init__.py ADDED
File without changes
src/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (199 Bytes). View file
 
src/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (155 Bytes). View file
 
template.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pathlib import Path
3
+ import logging
4
+
5
+ PROJECTNAME = "AI_AGENT_RESUME_SELECTOR"
6
+ list_files =[
7
+ ".github/workflows/.gitkeep",
8
+ f"src/{PROJECTNAME}/__init__.py",
9
+ f"src/{PROJECTNAME}/components/__init__.py",
10
+ f"src/{PROJECTNAME}/utilities/__init__.py",
11
+ f"src/{PROJECTNAME}/utilities/common.py",
12
+ f"src/{PROJECTNAME}/configurations/__init__.py",
13
+ f"src/{PROJECTNAME}/configurations/config.py",
14
+ f"src/{PROJECTNAME}/entity/__init__.py",
15
+ f"src/{PROJECTNAME}/entity/projectEntity.py",
16
+ "params.yaml",
17
+ "schema.yaml",
18
+ "main.py",
19
+ "research/research.ipynb",
20
+ "data/raw/__init__.py",
21
+ "data/cleaned/__init__.py"
22
+ ]
23
+
24
+ for filepath in list_files:
25
+ filepath = Path(filepath)
26
+ dir,filename = os.path.split(filepath)
27
+ if dir!= "":
28
+ os.makedirs(dir,exist_ok=True)
29
+ logging.info(f">>>>>> creating directory {dir}")
30
+ if (not os.path.exists(filepath) ) or (os.path.getsize(filepath)==0):
31
+ with open(filepath,"w") as f:
32
+ pass
33
+ logging.info(f">>>>>> creating file {filepath}")