shekkari21 commited on
Commit
9607899
·
0 Parent(s):

Update .gitignore to exclude resume_data directory

Browse files
Files changed (41) hide show
  1. .gitignore +10 -0
  2. Code+Folder/.streamlit/config.toml +9 -0
  3. Code+Folder/jd_data/bianalyst1_google.txt +18 -0
  4. Code+Folder/jd_data/bianalyst2_amazon.txt +24 -0
  5. Code+Folder/jd_data/bianalyst3_nicbl.txt +30 -0
  6. Code+Folder/jd_data/bianalyst4_rocketlane.txt +34 -0
  7. Code+Folder/jd_data/bigdataanalyst1_accenture.txt +18 -0
  8. Code+Folder/jd_data/bigdataanalyst2_maveric.txt +12 -0
  9. Code+Folder/jd_data/bigdataanalyst3_inpost.txt +22 -0
  10. Code+Folder/jd_data/bigdataanalyst4_quess.txt +9 -0
  11. Code+Folder/jd_data/da1_rakuten.txt +58 -0
  12. Code+Folder/jd_data/da2_schneider.txt +30 -0
  13. Code+Folder/jd_data/da3_infosys.txt +22 -0
  14. Code+Folder/jd_data/da4_tcs.txt +22 -0
  15. Code+Folder/jd_data/dataanalyst1_tcs.txt +7 -0
  16. Code+Folder/jd_data/dataanalyst2_kaplan.txt +19 -0
  17. Code+Folder/jd_data/dataanalyst3_amex.txt +23 -0
  18. Code+Folder/jd_data/dataanalyst4_amazon.txt +30 -0
  19. Code+Folder/jd_data/de1_ola.txt +33 -0
  20. Code+Folder/jd_data/de2_genpact.txt +30 -0
  21. Code+Folder/jd_data/de3_amazon.txt +20 -0
  22. Code+Folder/jd_data/de4_idexcel.txt +23 -0
  23. Code+Folder/jd_data/ds1_volvo.txt +30 -0
  24. Code+Folder/jd_data/ds2_wipro.txt +30 -0
  25. Code+Folder/jd_data/ds3_lenovo.txt +27 -0
  26. Code+Folder/jd_data/ds4_amazon.txt +30 -0
  27. Code+Folder/jd_data/mle1_allianz.txt +25 -0
  28. Code+Folder/jd_data/mle2_exxonmobil.txt +24 -0
  29. Code+Folder/jd_data/mle3_ubs.txt +21 -0
  30. Code+Folder/jd_data/mle4_oneorigin.txt +24 -0
  31. Code+Folder/jd_data/mlops1_mindtree.txt +22 -0
  32. Code+Folder/jd_data/mlops2_convin.txt +26 -0
  33. Code+Folder/jd_data/mlops3_exela.txt +17 -0
  34. Code+Folder/jd_data/mlops4_exl.txt +23 -0
  35. Code+Folder/readme.md +254 -0
  36. Code+Folder/requirements.txt +12 -0
  37. Code+Folder/src/constants.py +41 -0
  38. Code+Folder/src/directory_reader.py +143 -0
  39. Code+Folder/src/embedding_model.py +59 -0
  40. Code+Folder/src/resume_scorer.py +117 -0
  41. Code+Folder/src/resume_suggestions.py +229 -0
.gitignore ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ *.pyc
3
+ .env
4
+ *.pem
5
+ .DS_Store
6
+ output/
7
+ venv/
8
+ .idea/
9
+ Data/
10
+ Code+Folder/resume_data/
Code+Folder/.streamlit/config.toml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ [theme]
2
+ primaryColor = "#F63366"
3
+ backgroundColor = "#FFFFFF"
4
+ secondaryBackgroundColor = "#F0F2F6"
5
+ textColor = "#262730"
6
+ font = "sans serif"
7
+
8
+ [server]
9
+ maxUploadSize = 10
Code+Folder/jd_data/bianalyst1_google.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Minimum qualifications:
2
+ Bachelor's degree in Engineering, Analytics, or equivalent practical experience.
3
+ 8 years of experience in business intelligence or data analytics.
4
+ Experience in SQL and data visualization.
5
+ Experience collaborating across multiple teams.
6
+ Preferred qualifications:
7
+ Experience with data warehousing and data modeling concepts.
8
+ Experience with big data technologies (e.g., Hadoop, Spark).
9
+ Familiarity with Google Cloud Platform (GCP) and related tools.
10
+ About The Job
11
+ In this role, you will transform raw data into actionable insights that drive business decisions across Google. You will collaborate with various teams to identify key metrics, analyze data, build robust data models, and develop data-driven solutions that contribute to the success of our projects.
12
+
13
+ Responsibilities
14
+ Design and implement automated and standardized analysis tools, empowering users to uncover business opportunities and make informed decisions.
15
+ Examine and validate data integrity throughout the collection process, performing data profiling to identify and comprehend data anomalies.
16
+ Strategize and execute prioritized project tasks, selecting the most effective approaches to achieve project goals.
17
+ Identify opportunities to enhance established solutions by recommending creative methods and tools.
18
+ Translate complex data and analyses into readily digestible formats, enabling supported teams to accurately assess the performance of key business areas, products, and processes.
Code+Folder/jd_data/bianalyst2_amazon.txt ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Description
2
+
3
+ We are looking for an eager learner, and team player as a Business Analyst in the Social commerce team. The Business Intelligence team in GlowRoad(a unit of Amazon) works closely with all departments for fulfilling data analysis and business operation’s related data. This is a full stack team that works at all stages in data pipelines, warehousing, reporting, business analysis, machine learning, and business optimization. The team members have immense scope of learning in terms of technology as well as business due to its visibility to cross functional data, full stack BI technologies, and rapidly growing services.
4
+
5
+ GlowRoad is a social commerce company that sells products to customers at wholesale prices and helps them resell on Facebook and WhatsApp. It also provides them with all reselling services like logistics network, ecommerce website, and the ability to collect cash.
6
+
7
+ Key job responsibilities
8
+
9
+ Projects
10
+ Develop analytical models to assess the problems, solutions and impact on business
11
+ Understand a business problem, the available data and identify what statistical techniques can be applied for the solution
12
+ Responsible for giving insights to management for strategic planning
13
+ Reporting
14
+ Own the design, development, automation, and maintenance of ongoing metrics, reports, analyses, dashboards on the key drivers of our business
15
+ Partner with operations/business teams to consult, develop and implement KPIs, automated reporting/process solutions and data infrastructure improvements to meet business
16
+ Enable effective decision making by retrieving and aggregating data from multiple sources and compiling it into a digestible and actionable format
17
+ Prepare and deliver business reviews to the senior management team regarding progress and roadblocks
18
+ Analyze historical and customer segmentation data to identify trends and insights
19
+ Data Management
20
+ Contribute to the architecture, design and implementation of next-generation BI solutions
21
+ Manage AWS resources including EC2, RDS, Redshift, Glue, Lambda etc.
22
+ Managing Data pipelines and warehouses.
23
+ Interface with other technology teams to extract, transform, and load data from a wide variety of data sources
24
+ Continually improve ongoing reporting and analysis processes, automating or simplifying self-service support for customers
Code+Folder/jd_data/bianalyst3_nicbl.txt ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ About the job
2
+ A Business Intelligence Analyst might gather, clean, and analyze data like revenue, sales, market information, or customer engagement metrics of a business. BI analysts can also be asked to program tools and data models to help visualize or monitor data.
3
+ Experience: 3+ Years
4
+ Technical:
5
+ Development of high-quality database solutions
6
+ Strong in SQL and Good knowledge in PL/SQL.
7
+ Should be knowledgeable in SQL Performance Tuning, Joins, UNION, etc., and writing optimized SQL queries.
8
+ Develop queries & procedures, functions, and triggers, create custom reports
9
+ Provide scheduled management reporting
10
+ Must have used the OBIEE platform at least for a year in the last two years
11
+ Teamwork:
12
+ Must promote a collaborative environment within and across IT teams.
13
+ Project Management:
14
+ Must be organized, self-sufficient and have the ability to manage multiple initiatives simultaneously. Must have the ability to coordinate with other teams and vendors to ensure that project workflow is effectively communicated.
15
+ Responsibilities:
16
+ Maintaining data analytics platforms
17
+ Evaluating and improving existing BI systems.
18
+ Conducting troubleshooting on BI models.
19
+ Generating and delivering quality reports to all departments and performing quality assurance checks on reports.
20
+ Creating and managing tables, views, indexes, DB-link, and other database objects in the Oracle database.
21
+ Identifying installation solutions for new databases.
22
+ Publishing and/or presenting design reports
23
+ Should be able to research required data.
24
+ Provides mentoring and training to other team members to aid in their individual development to help improve overall team performance.
25
+ Analyzing business requirements to design, develop and update dashboards, reports, and data extracts.
26
+ Review and interpret ongoing business report requirements and coordinate with business teams.
27
+ Job Requirements:
28
+ Minimum 3 years of experience needed in core SQL development
29
+ PL-SQL programming & Microsoft SQL Server concepts
30
+ HTML and JavaScript
Code+Folder/jd_data/bianalyst4_rocketlane.txt ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ About the Role
2
+ As the first BI hire, your role will be pivotal in consolidating disparate data sources from various tools utilized across sales, marketing, customer success, and product teams. Using connectors tailored to each tool, you'll establish a centralized repository within Snowflake, ensuring seamless data integration. From this unified database, you'll craft insightful dashboards catered to specific team needs and stakeholder interests. Your ownership of this end-to-end process will drive actionable insights and empower informed decision-making across the organization.This is a lean team and a single person role, which would need you to hit the ground running.
3
+ Responsibilities
4
+ (Phase - 1) Data Engineering & Dashboarding
5
+ Own and maintain DBM & connectors set-up on snowflake platform (Data Warehouse)
6
+ Normalize the tables into readable format for wider teams
7
+ Write manipulation layer with calculated fields and create presentation layer with access control
8
+ Create, design and publish different dashboards for different teams, stakeholders and executive
9
+ Maintain and update the dashboard on weekly, and a monthly basis
10
+ (Phase - 2) Business Analysis and Insights Generation:
11
+ Perform Adhoc analysis on data to derive actionable insights
12
+ Utilize past trends to forecast sales and other metrics as required.
13
+ Craft compelling narratives that tie insights to business objectives.
14
+ (Phase - 3) Data Modelling
15
+ Utilize past trends to forecast sales and other metrics as required.
16
+ Craft compelling narratives that tie insights to business objectives.
17
+ Qualifications
18
+ Must haves/Non Negotiables
19
+ Advanced SQL (not limited to Subqueries, table normalization, etc.)
20
+ Intermediate working knowledge and experience in building dashboards on Tableau or Power BI
21
+ Intermediate working knowledge in excel (not limited to aggregate, lookup functions, pivot table, power
22
+ Strong analytical and quantitative skills
23
+ Excellent communication skills for effective collaboration with cross-functional teams
24
+ Good to have/what will put you ahead of the curve
25
+ Experience in the SaaS or related fields is a plus
26
+ Working knowledge of python is a plus
27
+ Experience in performance monitoring and analysis
28
+ Ability to synthesize data and extract key insights
29
+ Working experience on Salesforce. Having a Salesforce Admin certification is an advantage.
30
+ Personal Characteristics
31
+ Detail-oriented and organized, with exceptional prioritization skills
32
+ Strong work ethic, passion, and creativity
33
+ The ability to work and thrive in a fast-paced, rapidly changing work environment
34
+ A “roll up your sleeves and get things done” attitude, coupled with the ability to consistently give/receive feedback
Code+Folder/jd_data/bigdataanalyst1_accenture.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Accenture is a global professional services company with leading capabilities in digital, cloud and security. Combining unmatched experience and specialized skills across more than 40 industries, we offer Strategy and Consulting, Interactive, Technology and Operations services — all powered by the world’s largest network of Advanced Technology and Intelligent Operations centers. Our 674,000 people deliver on the promise of technology and human ingenuity every day, serving clients in more than 120 countries. We embrace the power of change to create value and shared success for our clients, people, shareholders, partners and communities.
2
+
3
+ The work:
4
+ You’ll work with key stakeholders of some of the most innovative and valued companies on the planet to design unique and exciting solutions.
5
+ You’ll design, test, implement and evolve data handling solutions on premise and in the cloud, leveraging famous tech or niche tech.
6
+ You’ll implement data pipelines that integrate data from various sources, ranging from data manually provided via excel to integration with a complex system or an open-source system.
7
+ You’ll manage governance and scheduling, monitoring data quality and data security throughout project lifecycles.
8
+
9
+ Here’s what you need:
10
+
11
+ Experience in programming languages including Spark, Python and Scala.
12
+ Hands-on experience in at least one data platform, such as AWS, Google Cloud, Azure, Cloudera.
13
+ Knowledge on the different ways data can be processed and the principles of data processing (streaming and batch data processing, workflow orchestration).
14
+
15
+ Bonus points if:
16
+
17
+ You have solid knowledge of different Data Modelling techniques.
18
+ You have solid knowledge of niche technologies (NoSQL).
Code+Folder/jd_data/bigdataanalyst2_maveric.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Job Description:
2
+ Experience performing requirements gathering for Data interfaces such as Kafka, Hadoop/Hive, etc.
3
+ Experience in gathering Interface specifications for messages and files and drafting user stories.
4
+ Experience interfacing with project stakeholders in Big Data applications, real-time applications and gathering interface requirements.
5
+ Clarifying requirements for developers and testers.
6
+ 5+ years of experience with hands on experience of tracking tools like JIRA, Confluence etc.
7
+ Excellent communication & presentation skills and experienced in working with senior and technical stakeholders.
8
+ Strong understanding of Agile Methodology.
9
+
10
+ Good to have:
11
+ Experience in AI / ML / NLP
12
+ Exposure to Retail Banking Operations domain.
Code+Folder/jd_data/bigdataanalyst3_inpost.txt ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Role Overview:
2
+ As a Big Data Analyst, you will be at the forefront of analyzing vast volumes of real-time data from diverse systems, including parcel lockers, package lifecycle, transportation, user communication and similar topics relative to InPost Group products. This role spans across Poland and extends to 7 international markets, offering a unique opportunity to impact our global operations significantly.
3
+ Key Responsibilities:
4
+ Analytical Product Development: Lead the creation of analytical products from inception through delivery, utilizing a deep understanding of business needs across multiple markets. Your work will drive strategic decisions and optimize operations globally.
5
+ Real-Time Data Processing: Harness large-scale, real-time data from various sources to generate actionable insights, recommendations, and reports. Your analysis will support continuous improvements in parcel delivery and customer interaction.
6
+ Cross-Functional Collaboration: Work closely with business units to understand their needs, define key performance indicators (KPIs), and establish the necessary data sources and methodologies. Partner with data providers and developers to ensure seamless integration and processing of critical data.
7
+ Data Governance and Documentation: Take charge of data governance, ensuring data integrity and consistency across the board. Your documentation will guide best practices and support data-driven decision-making processes.
8
+
9
+ Analytical Process:
10
+ Engage with business units to identify KPIs and initiate projects.
11
+ Determine the availability of data in our data lakehouse and identify or define necessary data structures.
12
+ Perform iterative data analysis, including understanding, transforming, and aggregating data to establish KPIs.
13
+ Develop recommendations for data models and collaborate on report building and data visualization.
14
+ Lead the operationalization of data calculations and ensure comprehensive documentation and data governance.
15
+ Collaboration and Technology:
16
+ Translate business requirements into data needs, defining data models and setting standards for data providers.
17
+ Work within Data&AI squads, collaborating with data engineers, data analysts, and data scientists to leverage a wide range of competencies for project needs.
18
+ Utilize the latest technologies, including Azure, Google Cloud, Databricks, pySpark, and Power BI, to analyze data and generate insights.
19
+ Experience: At least 3 years of experience in data analysis, business intelligence, or related areas, preferably with exposure to global or multi-market operations.
20
+ Technical Skills: Proficiency in PySpark, SQL and Python. Familiarity with cloud platforms (Azure, AWS, Google Cloud) and big data technologies. Experience with data visualization tools (e.g., Power BI, Tableau).
21
+ Analytical and Strategic Thinking: Strong ability to analyze large datasets and generate actionable insights. Good business acumen to understand company goals and KPIs.
22
+ Data Governance: Knowledge of data governance principles and practices.
Code+Folder/jd_data/bigdataanalyst4_quess.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ Must have skills:
2
+ Knowledge of Spark, Scala and conversant with SQL
3
+ Reverse engineer the SQL queries, Scala code to understand functionality
4
+ Capable of identifying, analysing and interpret patterns and trends in complex data sets
5
+ Has worked on creating database design, data models & techniques for data mining.
6
+ Good To Have Skills
7
+ Adept at using data processing platforms like Hadoop and CouchDB
8
+ Performing analysis to assess quality and meaning of data
9
+ Preparing reports for the management stating trends, patterns, and predictions using relevant data
Code+Folder/jd_data/da1_rakuten.txt ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Key Responsibilities:
2
+ Architect will define and document the data model, data policies, rules, and standards to govern which data is collected, how it is stored, accessed, enriched, integrated, and consumed within the solution.
3
+ and implement a scalable, high performance data models and data schema to support data integration, storage and retrieval. Output and maintain ERD covering all BSS components.
4
+ existing data documentation for gaps. Define Data Security, Data Retention, Back-up & restoration policies aligning to client guidelines
5
+ build and maintain the database platform to provide stable and secure services.
6
+ and designing plans for database systems based on various factors such as capacity planning/lifecycle management.
7
+ with other team members and stakeholders.
8
+ be able to install, deploy, parameters configuration of Databases on any of the platforms Linux/windows/k8s
9
+ performance turning, capacity planning and optimisation of database systems to ensure optimal data processing and retrieval
10
+ Technical skill you should have:
11
+ Hands on experience is designing customer data model with account hierarchy for E-Commerce, Care, CRM or Billing solution
12
+ Very good knowledge in database technology for bothNoSQL DB and SQL DB. Working experience with in-memory DB such as Couchbase would be an advantage.
13
+ Good knowledge in designing, building, and operating in-production Big Data, stream processing, and/or enterprise data integration solutions using Apache Kafka
14
+ Establish scalable implementation of data management framework, platform and tools stack
15
+ Good knowledge to develop, implement and managing BAU operational process for data quality functionality and capability such as Data Quality Statistics, Data Quality Scorecards, Data Quality Analysis and Workflows Design
16
+ Experience utilising Visualisation tools such as Tableau, Power BI or any similar tool
17
+ Experience in replication tools like Couchbase XDCR, Mirroring, Log Shipping, MSSQL
18
+ Experience in Clustering, always on, PG Physical and Logical replication
19
+ Scripting: PowerShell, Python
20
+ Proficient in Document-oriented DB ex. Couchbase DB/MongoDB is a must.
21
+ Proficient in any of the following database systems would be an advantage:
22
+ RDBMS ex. MYSQL, PostgreSQL, Oracle
23
+ NOSQL DB ex. Elasticsearch
24
+ Desirable Skills
25
+ of DevOps practices – Kubernetes, Docker, etc.
26
+ measurement and monitoring tools like – JMeter, Grafana
27
+ experienced in traditional waterfall methodology and or Agile/
28
+ Delivery
29
+ shell scripting
30
+ of deployment on Linux, Mac OS operating systems
31
+ version source control systems such as Git or SVN
32
+ of presenting solutions to clients
33
+ & External Stakeholder management
34
+ Data Engineer
35
+ 1. Idexcel Data Engineer
36
+ Experience: 3 to 10 years
37
+ Job Description
38
+ Candidate will join our Advance Analytics project.
39
+ Will be working on Multiple Data Integration projects.
40
+ Will be working on creating Data Lake in open table format.
41
+ Will involve in creating Realtime advance reporting platform and its Datawarehouse.
42
+ Will be involved in building Data pipeline, Lambdas, Data Solution Architecting,
43
+ Deployment of code, Automation, Code review.
44
+ Should be self-motivated person to led technical upgradation, schema redesigning.
45
+ Keep himself updated on Data engineering developments both opensource and AWS.
46
+ Required Skills
47
+ BE or BTech with good Communication and presentation skill.
48
+ AWS full DE stack
49
+ Experience in building Realtime Data analytic solution on AWS is must.
50
+ Experience in core Banking Industry is a Plus,
51
+ Experience in developing Analytical solutions for Finance function.
52
+ Knowledge of Big Data Solutions such as Hadoop, NoSQL, MapReduce etc
53
+ Good Knowledge of agile Development like Scrum and Sprint planning. CI/CD
54
+ Working Knowledge of API consumption.
55
+ Data Integration with external tools like Salesforce, SAP, Codata is a plus.
56
+ Candidates with AWS Certification will be preferred.
57
+ Tools
58
+ Aurora Postgres DB, SQL, Spark, Kafka, Python, Redshift, Snowflake, Airflow, Glue (ETL, Catalog, Crawler), Lambda, SQS, SNS, Data Lake, Cloud watch, Cloud Trail, DBT, AWS SDK, Boto3, Kinesis, kinesis Firehose, Everbridge, Code pipeline. MongoDB, Lake formation, Cloud Formation, Git Branching, code reviews
Code+Folder/jd_data/da2_schneider.txt ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Job Description:
2
+
3
+ Data Architect will be part of NAM HUB Global Services within Tools & Data team that process the offer data flow from stage of creation till WEB publication. The data architect is responsible for the creation of publishable data in PIM according to the Schneider Electric offer data governance rules with strong consideration of special requirements of Services offer that could easily localized by country.
4
+
5
+ Key Responsibilities
6
+ Work with Offer Design to translate business, marketing needs and technical system requirements into data model and develop existing data models for proposed Services Offers using defined Schneider quality standards for WEB publication and data strategy in Product Informational Management (PIM) system to produce clear Services technical datasheet with proposed customer values.
7
+ Perform tasks as required in Product Informational Management (PIM) system to have the Services offer data accurate and complete, corresponding to the latest Guidelines and ensure the proper web publication of Services on related WEB, always ensuring connection between Products and suggested service offers.
8
+ Optimize new and current database systems, collaborating with Global Services Data Office, being an owner and main executor of high-level standards of offer data publication.
9
+ Perform as technical consultant and advisor for the HUB squads, Country PIM champions, troubleshooting when needed PIM related issues.
10
+ Digital Service Experience
11
+ Define the right offer data digital customer journey.
12
+ Top cooperation with DCX, Product LOB and PO
13
+ Web publication
14
+ Web publication / selector
15
+ Execute EDMS
16
+ Dedicated support for seamless offer data digital journey
17
+ Taxonomy
18
+ Ensuring the alignment of PIM dictionary implementation with standards
19
+ Develop product classification and web-based product data sheet.
20
+ Qualifications
21
+ Education: Bachelor’s degree in Engineering (Electrical or Industrial preferred) or equivalent field
22
+ Experience in Marketing Offer Development and Customer Journey
23
+ Electrical products familiarity is preferred but is not mandatory.
24
+ MS Office skills and strong knowledge of MS Excel are required.
25
+ Knowledge of PIM (Product Informational management), EDMS (electronic documents management system), CaP (Check a Product), Symphony is preferred, but is not mandatory.
26
+ Fluent in English
27
+ Good communication skills both verbal and written, customer-oriented way of work.
28
+ Ability to learn fast and adapt to changing environment.
29
+ Self-Driven to work independently while keeping collaborative mindset to work with HUB squads, teams, and global chapters.
30
+ Remote position but must work United States Time Zones.
Code+Folder/jd_data/da3_infosys.txt ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Job Description
2
+ Proven experience in designing and implementing data solutions on AWS.
3
+ In-depth knowledge of AWS data services such as S3, Glue, Redshift, Athena, Lambda and Cloud Formation.
4
+ Proficiency in programming languages such as Python for data processing and automation.
5
+ Proficiency in SQL and experience in working on different databases.
6
+ Experience with data warehousing, data modeling, and ETL development.
7
+ Strong understanding of data security, encryption, and compliance on AWS.
8
+ Excellent problem-solving and communication skills.
9
+ AWS certification(s) such as AWS Data Engineer - Associate is a plus.
10
+ Design, develop, and maintain data pipelines and ETL processes on AWS using services such as S3, Glue, Lambda and Cloud Formation.
11
+ Implement scalable and efficient data storage solutions on AWS, ensuring data security and compliance.
12
+ Collaborate with data scientists and analysts to understand data requirements and implement solutions to support analytical processes.
13
+ Optimize data workflows and processes for performance and cost-efficiency.
14
+ Troubleshoot and resolve data-related issues, ensuring minimal downtime and high availability.
15
+ Monitoring existing data pipelines and be part of other support activities.
16
+ Stay updated with AWS services and best practices and provide recommendations for continuous improvement.
17
+ Ability to develop value-creating strategies and models that enable clients to innovate, drive growth and increase their business profitability
18
+ Logical thinking and problem-solving skills along with an ability to collaborate
19
+ Understanding of the financial processes for various types of projects and the various pricing models available
20
+ Ability to assess the current processes, identify improvement areas and suggest the technology solutions
21
+ One or two industry domain knowledge
22
+ Client Interfacing skills
Code+Folder/jd_data/da4_tcs.txt ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Experience Range: 10+ Years
2
+ Must-Have:
3
+ Experience in Data intensive activities (Data Modeling, ER Diagram, ETL, Data mapping, Data Governance and security)
4
+ Experience in End-to-End implementation of large Data Lake or Data Warehouse solution on Azure
5
+ Experience in Azure data lake and Azure Synapse
6
+ Concepts on MS Fabrics
7
+ Strong knowledge on Data Governance using industry leading tools.
8
+ Knowledge on MS Purview
9
+ Hands on experience with ETL tools (ADF), Databricks and Spark Pools
10
+ Strong knowledge and experience in data modeling
11
+ Strong knowledge on Data Security / Encryption/Monitoring
12
+ Knowledge on Azure DevOps in Data Lake Solution deployments
13
+ Analytical and problem-solving skills with a high degree of initiative and flexibility to be available over extended hours.
14
+ Ability to communicate with Business SME.
15
+ Ability to create Solution Architecture documents using Design Patterns –Data Models.
16
+ Experienced with Onsite – Offshore Delivery Model.
17
+ Technical background ideally within Managed Services and IT outsourcing industry.
18
+ Certification on Data Platform Solutions, Azure
19
+ Good to Have:
20
+ Experience on Cloud Tool & Technology viz. ADO, ServiceNow, Azure Monitoring, Performance Management, Analytics etc.
21
+ Understanding of Agile Process.
22
+ Maintains a broad and current knowledge of the industry on Cloud Platforms
Code+Folder/jd_data/dataanalyst1_tcs.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ Technical/Functional Skills
2
+ Key POC for managing demand forecast of an entire commodity, handling escalations and providing solutions
3
+ Using Python, SQL and Excel for data analysis
4
+ Deploying Tableau dashboards to visualize complex data insights and identify trends and patterns
5
+ Coordinating with multiple stakeholders including global supply managers, material program managers, vendors, module forecast DRIs, application developers
6
+ Managing a suite of tools to generate forecasts and perform supply demand analysis to facilitate supply planning
7
+ Managing User Acceptance Testing and ensuring smooth transition and successful rollout of product releases.
Code+Folder/jd_data/dataanalyst2_kaplan.txt ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Primary/Key Responsibilities
2
+ Problem Solving with Data-Driven Solutions: Utilize data analysis techniques to identify and address business challenges effectively, leveraging insights derived from data to propose and implement strategic solutions.
3
+ Data Management and Quality Assurance: Conduct data querying, cleansing, manipulation, and other tasks to ensure timely and high-quality delivery of data, maintaining data integrity and accuracy throughout the analytical process.
4
+ Presentation and Visualization Development: Develop presentations, visualizations, and metrics to address stakeholders' inquiries, providing clear and insightful representations of data-driven findings to support informed decision-making.
5
+ Continuous Learning and Technological Advancement: Stay abreast of the latest tools and technologies in data analysis, continuously learning and improving skills to remain up-to-date with industry advancements and enhance analytical capabilities.
6
+ Collaborative Cross-Functional Teamwork: Engage in cross-functional collaboration within multidisciplinary teams, working closely with technical and business functional units to ensure alignment of objectives, foster effective communication, and drive collective success.
7
+ Business Acumen and Strategic Contribution: Proactively engage in comprehending business operations, identifying pertinent data sources, and leveraging insights to propose innovative, data-driven solutions that address key business challenges and drive strategic initiatives.
8
+ Hybrid Schedule: 3 days remote / 2 days in office
9
+ 30-day notification period preferred
10
+
11
+ Minimum Qualifications
12
+ Bachelors or Above in Statistics/Mathematics, Engineering, Or Business Or any other equivalent degree.
13
+ Minimum 2 years of hands-on experience with SQL for data querying and management..
14
+ Minimum 2 years of experience with R or Python for data analysis and automation.
15
+ Minimum 2 years of experience with Tableau or similar tools for creating impactful visualizations.
16
+ Minimum 4 years of experience in data analytics within commercial settings, spanning domains like Marketing, Operations, or Product Management.
17
+ Strong knowledge of basic and fundamental statistics.
18
+ Strong problem solving skills.
19
+ Strong communication skills.
Code+Folder/jd_data/dataanalyst3_amex.txt ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Position Overview:
2
+
3
+ Seeking a passionate, data-savvy Analyst to join the Technology Strategy Optimization team to support us drive forward our data driven transformation agenda. This dynamic role uses a consultative approach with the business segments to dive into our customer, product, channel, and digital data to uncover opportunities for consumer experience & servicing optimization. The ideal candidate will have a passion for working with data, extracting insights, and driving data-informed decision-making processes within the organization.
4
+
5
+ What You’ll do on a Typical Day :-
6
+ Data Collection and Analysis: Collect, clean, and analyze large datasets to identify trends, patterns, and correlations. Apply statistical methods and data visualization techniques to present findings effectively.
7
+ Report Generation: Generate regular and ad-hoc reports for various stakeholders to provide insights into key performance indicators, business metrics, and operational efficiency.
8
+ Data Quality Assurance: Ensure the accuracy, completeness, and reliability of data by implementing data validation processes and conducting quality assurance checks.
9
+ Business/Process Intelligence: Applies query, data exploration and transformation, basic statistical techniques, and visualization techniques to create business insights. Synthesize data into actionable insights about customer behaviour, and servicing performance. Clearly communicate these recommendations to internal team members and senior leadership.
10
+ Data-driven Decision Making: Collaborate with multi-functional teams to provide data-driven recommendations and insights to support critical initiatives, product development, and process improvements.
11
+ Data Visualization: Build interactive dashboards and data visualizations using tools such as Tableau, Power BI, or Excel to communicate complex data insights in a clear and actionable manner. Create compelling, intuitive, self-service dashboards to internal business partners to understand, have access to, and derive insights from data, metrics, and KPIs (key performance indicators)
12
+ Continuous Learning and Improvement: Stay up to date with industry trends, new technologies, and standard processes in data analysis and data science. Share knowledge and insights with team members to foster a culture of continuous learning and improvement.
13
+ Actively participate in multi-functional initiatives, collaborating with team members to measure project success. Provide opportunity analysis, measurement plans, dashboards, and post-release analyses.
14
+ What We’re looking for :-
15
+ Bachelor's degree in engineering/ IT/ Data Sciences or related fields
16
+ 8-10 years of data analyst experience, with a 2+ years of data visualization experience
17
+ Good background in data visualization including creation of dashboards and data models
18
+ An analytical mind and inclination for problem-solving with shown attention to detail combined with big-picture thinking
19
+ Ability to present and tailor details and information to various levels of an organization
20
+ Technical Skills: Must have at least one data visualization skill Tableau/Power BI. Proficiency with MS PowerPoint and Excel. Writing simple/complex SQL queries for data analysis
21
+ Completes work independently with general mentorship on new projects/requirements
22
+ Work well with peers, and superiors in all departments across the organization.
23
+ Need to have a robust mix of technical and communication skills, with a passion for optimization, data storytelling, and data visualization.
Code+Folder/jd_data/dataanalyst4_amazon.txt ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Description
2
+
3
+ Amazon is looking for a talented, driven Data Analyst It is a pivotal role that will contribute to the evolution and success of one of the fastest growing businesses in the company.
4
+
5
+ Amazon Global Trade is an important initiative to grow the Amazon Marketplace Seller Business around the world, with the India team focused on cross-border selling from sellers in India catering to customers abroad and sellers across the globe catering to customers in India. The Global Trade team is looking for a professional who relishes diving deep into data. You will have the exciting opportunity to deliver on a strategy to enable broad use of Amazon Seller Services by small/medium sellers and large enterprises in India.
6
+
7
+ Working in a dynamic environment, you will be responsible for monitoring key success metrics for sellers, identifying problem areas and business challenges and collaboratively shaping solutions with category and business teams to help sellers grow and optimize on the Amazon platform. The successful candidate has a passion for extracting actionable insights from data. He/she rolls up his/her sleeves, innovates, and quickly becomes a subject matter expert to assess business performance across sellers and market segments. He/she has significant experience working with customers, analyzing data, identifying trends, extracting conclusions, and presenting findings in a simple and clear manner. He/she enjoys problem solving and is proficient using Excel and other tools to analyze large data sets.
8
+
9
+ Key Responsibilities
10
+ ● Understand Amazon seller Services products and services and track/report business performance and problem areas using appropriate metrics.
11
+ ● Work cross functionally with the account management team to fix problems with sellers
12
+ ● Use Amazon’s tools to problem solve and validate solutions
13
+ ● Partner to define goals around key operational metrics
14
+ ● Recommend business actions based on analytical findings. Includes defining new metrics, techniques, and strategies to improve seller performance
15
+ Key job responsibilities
16
+ ● Understand Amazon seller Services products and services and track/report business performance and problem areas using appropriate metrics.
17
+ ● Work cross functionally with the program management team to fix problems with sellers
18
+ ● Use Amazon’s tools to problem solve and validate solutions
19
+ ● Partner to define goals around key operational metrics
20
+ ● Recommend business actions based on analytical findings. Includes defining new metrics, techniques, and strategies to improve seller performance
21
+
22
+ Basic Qualifications
23
+ ● 1+ years of data analytics or automation experience
24
+ ● Bachelor's degree
25
+ ● Knowledge of data pipelining and extraction using SQL
26
+ ● Knowledge of SQL and Excel at a moderate or advanced level
27
+ ● Experience with data mining tools like SQL, SAS, SPSS, or similar
28
+
29
+ Preferred Qualifications
30
+ ● Knowledge of SQL/Python/R, scripting, MS Excel, table joins, and aggregate analytical functions
Code+Folder/jd_data/de1_ola.txt ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Data at Ola:
2
+ Quality data is fundamental to Ola’s success. As a rapidly growing company, we are preparing for a future of tremendous growth and transformation. We are rebuilding our Data Engineering practice to enable the ola group companies success by building a solid data foundation. We are seeking stunning Principal Data Engineers to help us define and realize our vision for reliable and quality data across the company. This is a unique opportunity to shape Data Engineering for a strong, but high potential, company at high scale in its lifecycle.
3
+ Data Engineering at Ola:
4
+ We need to ensure every area of the business has trustworthy data to fuel insight and innovation. Understanding the business need, securing the right data sources, designing usable data models, and building robust & dependable data pipelines are essential skills to meet this goal.
5
+ At the same time, the technology used to create great data is continually evolving. We are moving to a reality where both batch & stream processing are leveraged to meet the latency requirements for the business. The Data Engineering paved path is still taking shape, and we want to collaboratively develop this to support the entire company. We need senior engineers who are passionate not only about the data, but also about improving the technology we leverage for Data Engineering.
6
+ We are looking for Data Pipeline Engineers to help us build and enhance big data platforms to achieve availability, scalability and operational effectiveness. The right individual will embrace the opportunity to tackle challenging problems and use their influence to drive continual improvement. You will also work on the cutting edge of technology, leveraging Hadoop, Hbase, Hive, Kafka, Spark, Flink, Mesos/Kubernetes, Hudi/Deltalake , Prometheus, Grafana etc.
7
+ Roles and Responsibilities:
8
+ Develop and automate large scale, high-performance data processing systems (batch and/or streaming) to drive Ola group business growth and improve the product experience.
9
+ Evangelize high quality software engineering practices towards building data infrastructure and pipelines at scale.
10
+ Lead data engineering projects to ensure pipelines are reliable, efficient, testable, & maintainable
11
+ Design our data models for optimal storage and retrieval and to meet critical product and business requirements.
12
+ Understand and influence logging to support our data flow, architecting logging best practices where needed
13
+ Contribute to shared Data Engineering tooling & standards to improve the productivity and quality of output for Data Engineers across the company
14
+ Partner with leadership, engineers, program managers and data scientists to understand data needs.
15
+ Educate your partners: Use your data and analytics experience to ‘see what’s missing’, identifying and addressing gaps in their existing logging and processes.
16
+ Work with stakeholders to build data lineage, data governance and data cataloging.
17
+ Leading projects using agile methodologies.
18
+ Communicate effectively with people of all levels in the organization.
19
+ Recruit, retain and develop people skills to take bigger responsibilities and challenges.
20
+ Experience & Skills:
21
+ Experience in custom ETL design, implementation and maintenance.
22
+ Experience with workflow management engines like Airflow, Dagster etc.
23
+ Working knowledge of relational databases and query authoring (SQL).
24
+ Experience with Java / Scala / Spark is preferred
25
+ Working experience with data at the petabyte scale.
26
+ Experience designing, building and operating robust distributed systems.
27
+ Experience designing and deploying high performance systems with reliable monitoring and logging practices.
28
+ Effectively work across team boundaries to establish overarching data architecture, and provide guidance to individual teams.
29
+ Expertise of Amazon Web Services (AWS) and/or other relevant Cloud Infrastructure solutions like Microsoft Azure or Google Cloud.
30
+ Experience in managing and deploying containerized environments using Docker, Mesos/Kubernetes is a plus.
31
+ Experience in managing projects using scrum methodology.
32
+ TechStack : CloudEra Stack, Oozie, Hive, spark, Flink, Spark, K8s, EMR, Presto, Pinot, Trino, IceBerg, FileFormat: parquet, Avro ORC format, DeltaLake, Parquet, Airflow, Druid, Nifi, Hive/HiveQL, Clickhouse, Debezium (CDC) / DBT, Lakehouse, Spark Structured/Streaming, Flink, Apache Beam, Kafka, Hadoop/HbAse/HDFS, Hudi, File Formats (ORC, Parquet, Iceberg), Query engines (Presto, Hue, Trino), Trino or Presto, Stream Processing: Hudi vs iceberg vs delta-lake, Druid vs apache pinot vs trino, Apache Nifi, Apache Flume,Fabric and Secor, Maxwell, K8S, LakeHouse,
33
+ Educational Qualifications: Bachelor’s or Master’s degree in Engineering or related technical discipline (from premier institutes preferred)
Code+Folder/jd_data/de2_genpact.txt ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Inviting applications for the role of Principal Consultant-Senior Data Engineer
2
+ Include optimizing data pipelines, ensuring data integrity and consistency, enhancing system resiliency where applicable, maintaining and improving data security, proactive alerting, and monitoring for data pipelines, and automating repetitive data-oriented tasks.
3
+ Responsibilities
4
+ Automate data tasks on GCP.
5
+ Work with data domain owners, data scientists and other stakeholders to that data is consumed effectively on GCP.
6
+ Design, build, secure and maintain data infrastructure, including data pipelines, databases, data warehouses, and data processing platforms on GCP.
7
+ Measure and monitor the quality of data on GCP data platforms.
8
+ Implement robust monitoring and alerting systems to proactively identify and resolve issues in data systems. Respond to incidents promptly to minimize downtime and data loss.
9
+ Develop automation scripts and tools to streamline data operations and make them scalable to ensure accommodate growing data volumes and user traffic.
10
+ Optimize data systems to ensure efficient data processing, reduce latency, and improve overall system performance.
11
+ Collaborate with data and infrastructure teams to forecast data growth and plan for future capacity requirements.
12
+ Ensure data security and compliance with data protection regulations. Implement best practices for data access controls and encryption.
13
+ Collaborate with data engineers, data scientists, and software engineers to understand data requirements, troubleshoot issues, and support data-driven initiatives.
14
+ Continuously assess and improve data infrastructure and data processes to enhance reliability, efficiency, and performance.
15
+ Maintain clear and up-to-date documentation related to data systems, configurations, and standard operating procedures.
16
+ Minimum Qualifications / Skills
17
+ Bachelor’s or master’s degree in computer science, Software Engineering, Data Science or related field, or equivalent practical experience
18
+ Preferred Qualifications/ Skills
19
+ Proficiency in data technologies, such as relational databases, data warehousing, big data platforms (e.g., Hadoop, Spark), data streaming (e.g., Kafka), and cloud services (e.g., AWS, GCP, Azure).
20
+ Strong programming skills in languages like Python (numpy, pandas, pyspark), Java (Core Java, Spark with Java, functional interface, lambda, java collections), or Scala, with experience in automation and scripting.
21
+ Experience with containerization and orchestration tools like Docker and Kubernetes is a plus.
22
+ Experience with data governance (data plex), data security, and compliance best practices on GCP.
23
+ Solid understanding of software development methodologies and best practices, including version control (e.g., Git) and CI/CD pipelines.
24
+ Strong background in cloud computing and data-Intensive applications and services, with a focus on Google Cloud Platform.
25
+ Experience with data quality assurance and testing on GCP.
26
+ Proficiency with GCP data services (BigQuery; Dataflow; Data Fusion; Dataproc; Cloud Composer; Pub/Sub; Google Cloud Storage).
27
+ Strong understanding of logging and monitoring using tools such as Cloud Logging, ELK Stack, AppDynamics, New Relic, Splunk, etc.
28
+ Knowledge of AI and ML tools is a plus.
29
+ Google Associate Cloud Engineer or Data Engineer certification is a plus.
30
+ Experience in data engineering or data science on GCP.
Code+Folder/jd_data/de3_amazon.txt ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Description
2
+
3
+ Amazon Prime team is building an evergreen platform that will provide real time insights of traffic, sales, deals, prime engagement and is looking for rock start data engineer to build this. At Amazon Prime, understanding customer data is paramount to our success in providing customers with relevant and enticing benefits such as fast free shipping, instant videos, streaming music and free Kindle books in the US and international markets. At Amazon you will be working in one of the world's largest and most complex data environments.
4
+
5
+ You will be part of team that will work with the marketing, retail, finance, analytics, machine learning and technology teams to provide real time data processing solution that give Amazon leadership, marketers, PMs timely, flexible and structured access to customer insights. The team will be responsible for building this platform end to end using latest AWS technologies and software development principles.
6
+
7
+ As a Data Engineer, you will be responsible for leading the architecture, design and development of the data, metrics and reporting platform for Prime. You will architect and implement new and automated Business Intelligence solutions, including big data and new analytical capabilities that support our Development Engineers, Analysts and Retail business stakeholders with timely, actionable data, metrics and reports while satisfying scalability, reliability, accuracy, performance and budget goals and driving automation and operational efficiencies. You will partner with business leaders to drive strategy and prioritize projects and feature sets. You will also write and review business cases and drive the development process from design to release. In addition, you will provide technical leadership and mentoring for a team of highly capable Data Engineers.
8
+
9
+ Responsibilities
10
+ Own design and execution of end to end projects
11
+ Own managing WW Prime core services data infrastructure
12
+ Establish key relationships which span Amazon business units and Business Intelligence teams
13
+ Implement standardized, automated operational and quality control processes to deliver accurate and timely data and reporting to meet or exceed SLAs
14
+ Basic Qualifications
15
+ 3+ years of data engineering experience
16
+ Experience with data modeling, warehousing and building ETL pipelines
17
+ Experience with SQL
18
+ Preferred Qualifications
19
+ Experience with AWS technologies like Redshift, S3, AWS Glue, EMR, Kinesis, FireHose, Lambda, and IAM roles and permissions
20
+ Experience with non-relational databases / data stores (object storage, document or key-value stores, graph databases, column-family databases)
Code+Folder/jd_data/de4_idexcel.txt ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Experience: 3 to 10 years
2
+ Job Description
3
+ Candidate will join our Advance Analytics project.
4
+ Will be working on Multiple Data Integration projects.
5
+ Will be working on creating Data Lake in open table format.
6
+ Will involve in creating Realtime advance reporting platform and its Datawarehouse.
7
+ Will be involved in building Data pipeline, Lambdas, Data Solution Architecting,
8
+ Deployment of code, Automation, Code review.
9
+ Should be self-motivated person to led technical upgradation, schema redesigning.
10
+ Keep himself updated on Data engineering developments both opensource and AWS.
11
+ Required Skills
12
+ BE or BTech with good Communication and presentation skill.
13
+ AWS full DE stack
14
+ Experience in building Realtime Data analytic solution on AWS is must.
15
+ Experience in core Banking Industry is a Plus,
16
+ Experience in developing Analytical solutions for Finance function.
17
+ Knowledge of Big Data Solutions such as Hadoop, NoSQL, MapReduce etc
18
+ Good Knowledge of agile Development like Scrum and Sprint planning. CI/CD
19
+ Working Knowledge of API consumption.
20
+ Data Integration with external tools like Salesforce, SAP, Codata is a plus.
21
+ Candidates with AWS Certification will be preferred.
22
+ Tools
23
+ Aurora Postgres DB, SQL, Spark, Kafka, Python, Redshift, Snowflake, Airflow, Glue (ETL, Catalog, Crawler), Lambda, SQS, SNS, Data Lake, Cloud watch, Cloud Trail, DBT, AWS SDK, Boto3, Kinesis, kinesis Firehose, Everbridge, Code pipeline. MongoDB, Lake formation, Cloud Formation, Git Branching, code reviews
Code+Folder/jd_data/ds1_volvo.txt ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ What do we do?
2
+
3
+ We are the Simulation and Analytics Team within Volvo Group Trucks Technology, a dynamic team working towards enabling the Technology organization in evaluating futuristic products and services for Volvo business units such as trucks, buses, construction equipment and Marine
4
+
5
+ The main tasks in our team are to meet our customer’s expectation on quality, fuel consumption and to comply with emission legislation. This is done by selecting Engine HW-components and calibrate engine related functions. Upcoming emission legislation requires development of advanced control strategies together with an optimized hardware and software. You will be an important contributor in this development. Powertrain is the heart of the Truck and is one of the focus areas within Volvo Group, which is at the forefront of sustainable transport solutions for tomorrow.
6
+
7
+ What will you do?
8
+ You will work in agile teams through good collaboration with our colleagues in software development and design teams all around the world.
9
+
10
+ You along with the team will be responsible for understanding customer usage and performance of our products and for providing our solutions throughout entire product life cycle, from idea investigation and concept evaluation to industrialization and to aftermarket and maintenance. You will make meaningful interpretations, recommendations and eventually predictions from the Data available from various sources to support our endeavor in moving towards Data Driven Powertrain Development
11
+
12
+ You get the opportunity to follow your Data Driven Models from script to test cell to verification in a truck and eventually to being used by our end customers.
13
+
14
+ We have an agile way of working, where each team plan their activities in sprints and deliver solutions together as a team. We strive to have an open and honest environment within the teams, where it is easy to ask each other for support when needed. The tasks can be either part of a larger project or short tasks to improve products currently in production.
15
+
16
+ You will get the opportunity to interact with highly committed colleagues from different cultures. We hope you will learn as much from us as we will from you.
17
+
18
+ Who are you?
19
+
20
+ We believe that to be successful in this position, you are a team player, have strong experience in data engineering and analysis area, and a will to deliver. You have a knowledge of control systems and feedback systems (closed loop) in general with an exposure to numerical and data driven simulation of a system. You must have a proven experience in Data modelling – Regression, Clustering, Neural Networks, Time series etc. and should have used them in solving real-life challenges (prediction, automation, real time optimization etc).
21
+
22
+ You have a willingness to learn and take more responsibility with can-do attitude.
23
+
24
+ You will be greatly appreciated in this role if you have demonstrated Predictive analysis and decision-making using Data.
25
+
26
+ If you are a Master’s Degree holder in Mechanical/Automobile/Electronics/Mechatronics Engineering with fantastic analytical skills, have gained a strong domain understanding in Powertrain Engineering with proven skills in handling and analysing large set of data to make meaningful interpretations and if you believe that you can work smoothly with Python ( including libraries like Numpy, SciPy, Pandas, Tensorflow) , R, SQL, Git, Azure, Hadoop and Matlab/Simulink, pySpark, C/C++/ Scala then you can be a good fit into this role.
27
+
28
+ Experience of working with relational databases, data privacy and understanding of IOT based instrumentation design with additional data logging to build or validate models is a big plus.
29
+
30
+ A passion for turning data into knowledge with great visualizations using Power BI, QlikView, Tableau and an experience of working with plant/ component models, and integration of these models into SIL/MIL/HIL evaluations would be an icing on the cake.
Code+Folder/jd_data/ds2_wipro.txt ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Job Description
2
+ Contribute to the design and implementation of state-of-the-art AI solutions.
3
+
4
+ Assist in the development and implementation of AI models and systems, leveraging techniques such as Language Models (LLMs) and generative AI.
5
+
6
+ Collaborate with stakeholders to identify business opportunities and define AI project goals.
7
+
8
+ Stay updated with the latest advancements in generative AI techniques, such as LLMs, and evaluate their potential applications in solving enterprise challenges.
9
+
10
+ Utilize generative AI techniques, such as LLMs, to develop innovative solutions for enterprise industry use cases.
11
+
12
+ Integrate with relevant APIs and libraries, such as Azure Open AI GPT models and Hugging Face Transformers, to leverage pre-trained models and enhance generative AI capabilities.
13
+
14
+ Implement and optimize end-to-end pipelines for generative AI projects, ensuring seamless data processing and model deployment.
15
+
16
+ Utilize vector databases, such as Redis, and NoSQL databases to efficiently handle large-scale generative AI datasets and outputs.
17
+
18
+ Implement similarity search algorithms and techniques to enable efficient and accurate retrieval of relevant information from generative AI outputs.
19
+
20
+ Collaborate with domain experts, stakeholders, and clients to understand specific business requirements and tailor generative AI solutions accordingly.
21
+
22
+ Conduct research and evaluation of advanced AI techniques, including transfer learning, domain adaptation, and model compression, to enhance performance and efficiency.
23
+
24
+ Establish evaluation metrics and methodologies to assess the quality, coherence, and relevance of generative AI outputs for enterprise industry use cases.
25
+
26
+ Ensure compliance with data privacy, security, and ethical considerations in AI applications.
27
+
28
+ Leverage data engineering skills to curate, clean, and preprocess large-scale datasets for generative AI applications.
29
+
30
+ Generative AI
Code+Folder/jd_data/ds3_lenovo.txt ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Job Description: Data Scientist (User Experience Enhancement)
2
+
3
+ Are you passionate about leveraging data to create seamless and intuitive user experiences on smartphones? We're seeking a Senior Data Scientist to lead the charge in harnessing data-driven insights to enhance user interactions and provide personalized assistance. In this role, you'll be at the forefront of utilizing contextual data and advanced machine learning techniques to deliver tailored responses and actions, making smartphone interactions more intuitive and efficient. If you're excited about the intersection of data science and user experience optimization, we want you on our team!
4
+
5
+ Responsibilities:
6
+
7
+ Data-Driven Insights: Utilize data analytics and machine learning techniques to derive actionable insights from user interactions, contextual data, and feedback, driving continuous improvements in user experience.
8
+
9
+ Contextual Understanding: Develop algorithms and models to interpret contextual data, such as location, time, and user activity, enabling smartphones to provide relevant and timely assistance based on the user's current situation.
10
+
11
+ Prompting Enhancement: Enhance prompting mechanisms and intelligent notifications using data-driven approaches, ensuring that smartphones proactively assist users with relevant suggestions, reminders, and actions.
12
+
13
+ Personalized Reasoning: Build and refine large-scale datasets for training and improving language understanding models, enabling smartphones to deliver personalized responses and actions tailored to individual user preferences and behavior.
14
+
15
+ LLM Model Training: Collaborate with machine learning engineers to train and optimize language understanding models (LLMs) using curated datasets, fine-tuning model parameters to enhance accuracy and relevance of responses.
16
+
17
+ Continuous Improvement: Establish feedback loops and mechanisms for monitoring user interactions and collecting feedback, iteratively refining data models and algorithms to adapt to evolving user needs and preferences.
18
+
19
+ Data Governance: Ensure data quality, privacy, and compliance with relevant regulations and standards, implementing robust data governance practices to safeguard user information and maintain trust.
20
+
21
+ Qualifications:
22
+ B.E/M. Tech degree in Data Science, Computer Science, or a related field.
23
+ Minimum 5 years of experience in data analysis, machine learning, and user experience optimization, preferably in the mobile technology industry.
24
+ Proficiency in data mining, statistical analysis, and machine learning techniques, with hands-on experience in Python, R, or similar programming languages.
25
+ Strong understanding of natural language processing (NLP) and language understanding models, with experience in training and deploying machine learning models for text analysis and interpretation.
26
+ Excellent problem-solving and analytical skills, with the ability to derive actionable insights from complex datasets and translate them into practical solutions for improving user experiences.
27
+ Effective communication and collaboration skills, with the ability to work closely with cross-functional teams to implement data-driven solutions and drive continuous improvement in user interactions.
Code+Folder/jd_data/ds4_amazon.txt ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Description
2
+
3
+ Amazon Web Services (AWS) provides a highly reliable, scalable, and low-cost cloud platform that powers thousands of businesses in over 190 countries. AWS’ Infrastructure Services Supply Chain (AIS-SC) organization works to deliver cutting-edge solutions to source, build and maintain our socially responsible data center supply chains. We are a team of highly-motivated, engaged, and responsive professionals who enable the core sustainable infrastructure of AWS. Come join our team and be a part of history as we deliver results for the largest cloud services company on Earth!
4
+
5
+ Do you love problem solving? Are you looking for real world Supply Chain challenges? Do you have a desire to make a major contribution to the future, in the rapid growth environment of Cloud Computing?
6
+
7
+ Amazon Web Services is looking for a highly motivated, Data Scientist to help build scalable, predictive and prescriptive business analytics solutions that supports AWS Supply Chain organization. You will be part of the Supply Chain Analytics team working with Global Stakeholders, Data Engineers, Business Intelligence Engineers and Business Analysts to achieve our goals.
8
+
9
+ We are seeking an innovative and technically strong data scientist with a background in optimization, machine learning, and statistical modeling/analysis. This role requires a team member to have strong quantitative modeling skills and the ability to apply optimization/statistical/machine learning methods to complex decision-making problems, with data coming from various data sources. The candidate should have strong communication skills, be able to work closely with stakeholders and translate data-driven findings into actionable insights. The successful candidate will be a self-starter, comfortable with ambiguity, with strong attention to detail and ability to work in a fast-paced and ever-changing environment.
10
+
11
+ Responsibilities
12
+ Demonstrate thorough technical knowledge on feature engineering of massive datasets, effective exploratory data analysis, and model building using industry standard time Series Forecasting techniques like ARIMA, ARIMAX, Holt Winter and formulate ensemble model.
13
+ Proficiency in both Supervised(Linear/Logistic Regression) and UnSupervised algorithms(k means clustering, Principle Component Analysis, Market Basket analysis).
14
+ Experience in solving optimization problems like inventory and network optimization. Should have hands on experience in Linear Programming.
15
+ Understand the business reality behind large sets of data and develop meaningful solutions comprising of analytics as well as marketing management.
16
+ Work closely with internal stakeholders like the business teams, engineering teams and partner teams and align them with respect to your focus area
17
+ Innovate by adapting new modeling techniques and procedures
18
+ Passionate about working with huge data sets and be someone who loves to bring datasets together to answer business questions. You should have deep expertise in creation and management of datasets
19
+ Exposure at implementing and operating stable, scalable data flow solutions from production systems into end-user facing applications/reports. These solutions will be fault tolerant, self-healing and adaptive.
20
+ Detail-oriented and must have an aptitude for solving unstructured problems. You should work in a self-directed environment, own tasks and drive them to completion.
21
+ Excellent business and communication skills to be able to work with business owners to develop and define key business questions and to build data sets that answer those questions
22
+ Work with distributed machine learning and statistical algorithms to harness enormous volumes of data at scale to serve our customers
23
+ Basic Qualifications
24
+ 5+ years of data scientist experience
25
+ 4+ years of data querying languages (e.g. SQL), scripting languages (e.g. Python) or statistical/mathematical software (e.g. R, SAS, Matlab, etc.) experience
26
+ 3+ years of machine learning/statistical modeling data analysis tools and techniques, and parameters that affect their performance experience
27
+ Experience applying theoretical models in an applied environment
28
+ Preferred Qualifications
29
+ Experience in Python, Perl, or another scripting language
30
+ Experience in a ML or data scientist role with a large technology company
Code+Folder/jd_data/mle1_allianz.txt ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Key Responsibilities:
2
+ Lead end-to-end development and delivery of Machine-Learning applications, emphasizing operations and monitoring.
3
+ Collaborate with multidisciplinary teams to understand business requirements, identify ML workflow requirements, improve processes, and implement new features.
4
+ Perform software engineering tasks: requirements analysis, design, implementation, testing, deployment, code maintenance, etc.
5
+ Develop pipelines for deployment and operation of ML applications, following CI/CD best practices.
6
+ Implement optimization techniques to ensure efficient model inference, reducing computational costs and improving response times.
7
+ Collaborate with Back-End Engineers to ensure the infrastructure supports the demands of the model, especially concerning GPU resources, memory, and storage.
8
+ Contribute to architectural and technical documentation
9
+ Requirements:
10
+ Degree in a quantitative or engineering field like Computer Science, Physics, Mathematics, Statistics, etc.
11
+ 3-5 years of relevant professional experience in end-to-end data projects, from data cleaning to productionizing ML models.
12
+ Several years of Python programming experience and overall strong software development skills
13
+ Demonstrated knowledge of CI/CD pipelines, Jenkins & GitHub actions.
14
+ Solid understanding of machine learning product lifecycle and the commonly associated components (MLOps)
15
+ Understanding of multiple machine learning algorithms and techniques.
16
+ Expertise in ML model deployment (familiar with REST APIs) and monitoring.
17
+ Experience with containerization technologies such as Docker and Kubernetes.
18
+ Solid knowledge of at least one major cloud platform, preferably Azure Cloud.
19
+ Experience coordinating with various technical stakeholders (Engineers, Architects, Data Scientists) to achieve common goals.
20
+ Strong ability to self-organize, take ownership of topics, and drive them to delivery together with team members.
21
+ Fluency in English is required; additional languages are a plus.
22
+
23
+ Good to Have:
24
+ Insurance knowledge and proficiency in additional languages.
25
+ Experience in newer ML approaches (LLM, computer vision, etc.).
Code+Folder/jd_data/mle2_exxonmobil.txt ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ What Role You Will Play In Our Team
2
+ We are looking to hire candidates to work on challenging technology and engineering problems that span oil and gas exploration & production, chemicals/fuels/lubricants products and low carbon solutions. A successful candidate would understand a business problem (both commercially and technically), translate it into a computational, data science or machine learning problem and apply engineering, numerical, data science and programming skills to tackle it. The Machine Learning Engineer will work as part of a team to design, develop, deploy and sustain data science solutions that are scalable, reproducible and with commercial-grade quality.
3
+
4
+ What You Will Do
5
+ Applies software development practices, DevOps skills and Machine Learning (ML) techniques to orchestrate an end-to-end machine learning workflow that effectively brings ML models to production.
6
+ Participates in scoping of deployment of new data science solutions and implements the appropriate solution design.
7
+ Sustain data science solutions by enabling continuous ML model and/or service performance monitoring, training, and re-training of models, including the implementation of proactive alerting methods.
8
+ Works effectively with computational scientists, data scientists, engineers, software developers, and domain experts across the globe to develop and apply computational and data science solutions in support of our business.
9
+ About You
10
+ Skills and Qualifications
11
+ Bachelor’s degree from a recognized university in Computer Science, IT, Applied Mathematics, Engineering or related disciplines with minimum 7.0 CGPA or equivalent.
12
+ Minimum 3 years of experience in Data Science and Machine Learning or related computational domain.
13
+ Competent to expert level programming experience in C/C++/Python.
14
+ Strong foundation in application design.
15
+ Experience with refactoring legacy code and leveraging third-party libraries/APIs during software development.
16
+ Experience with Source code version control (Git), Azure Cloud platform and containers, Databricks and MLflow.
17
+ Continuous Integration and Continuous Deployment.
18
+ Familiarity with statistical analysis, regression and classification.
19
+ Preferred Qualifications / Experience
20
+ Experience with time series analysis, computer vision, natural language processing.
21
+ Knowledge or hands on experience on Matlab & SQL.
22
+ Strong written and verbal communication skills.
23
+ Prior knowledge of commercial software development and/or experience in commercial software teams.
24
+ Familiarity with Oil and Gas Industry.
Code+Folder/jd_data/mle3_ubs.txt ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Your role
2
+ Are you interested in pursuing your career in Asset Management? Does working in a data driven business excite you? Do you want to be part of the data revolution by performing hands on data analysis and developing ML/DL/Gen AI models to produce working and scalable concepts, along with managing multiple initiatives for cross-functional data analytics needs?
3
+
4
+ We’re looking for someone to work with a cross-functional team to:
5
+ Work with teams of data scientists and quant developers to create, deploy, and monitor models in production
6
+ Work with a team of data engineers to source and transform data, and to create robust, scalable data and model development infrastructure
7
+ Build components to manage the model lifecycle from feature engineering through deployment, validation and monitoring
8
+ Work with researchers and product/model owners to help drive product development and innovation
9
+ Design and solution production quality ML/DL/Gen AI models
10
+ Your team
11
+ You'll be working in the Chief Data Office (CDO) within UBS Global Asset Management. We're responsible for defining data strategy and supporting the management of core data sets used across Asset Management. Our team is structured to encompass several functions, including Data Strategy and Delivery, Data Mastering and Governance, Data Architecture, and Data Analytics. We're based in New York City, Chicago, Zurich, and Pune.
12
+
13
+ Your expertise
14
+ Minimum of 5+ years of hands-on experience with data science, ML/DL engineering
15
+ Experience with machine learning APIs and computational packages (i.e. NumPy, scikit-learn, Pandas)
16
+ Experience in developing ML, DL and Gen AI models
17
+ Experience with SQL and NoSQL databases
18
+ Experience with cloud big data technology (Azure preferred)
19
+ Understanding of Agile software methodology and modern CI/CD practices
20
+ Data domain knowledge of market data, security reference data, macroeconomics indicators, equity fundamental data, trades and positions data, risk data is a plus
21
+ BS in Computer Science, Software Engineering, Statistics, or equivalent
Code+Folder/jd_data/mle4_oneorigin.txt ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ About the Role:
2
+ We are looking for a highly skilled Senior Machine Learning Engineer with expertise in Large Language Models (LLMs) and Generative AI, with a preferred focus on intelligent document processing. The ideal candidate will have 6-8 years of experience in machine learning research and development, with a strong background in LLMs, generative modeling, and intelligent document processing techniques.
3
+ Key Responsibilities:
4
+ Research and Development:
5
+ Lead research efforts in the development and optimization of Large Language Models (LLMs) and Generative AI algorithms for intelligent document processing applications.
6
+ Stay updated with the latest advancements in LLMs, generative modeling techniques, and intelligent document processing technologies. Algorithm Development:
7
+ Design and implement advanced algorithms for training and fine-tuning LLMs and generative AI models tailored for intelligent document processing tasks.
8
+ Develop innovative solutions for document understanding, information extraction, and content generation using state-of-the-art techniques. Model Training and Optimization:
9
+ Develop scalable and efficient training pipelines for LLMs and generative AI models optimized for document processing tasks.
10
+ Optimize model performance in terms of accuracy, efficiency, and scalability, leveraging techniques such as hyperparameter tuning and model compression. Intelligent Document Processing: © OneOrigin 2024 1
11
+ Apply machine learning techniques to automate document processing tasks such as text extraction, entity recognition, document classification, and semantic analysis.
12
+ Develop and deploy end-to-end document processing solutions integrating LLMs and generative AI models with intelligent document processing workflows. Collaboration and Leadership:
13
+ Collaborate closely with cross-functional teams including software engineers and product managers to integrate machine learning models into intelligent document processing solutions.
14
+ Provide technical leadership and mentorship to junior members of the machine learning team, fostering a culture of innovation and excellence.
15
+ Required Skills and Qualifications:
16
+ Bachelor's, Master's, or Ph.D. degree in Computer Science, Electrical Engineering, Mathematics, or related field.
17
+ 8-12 years of experience in machine learning research and development, with a focus on LLMs, generative AI, and intelligent document processing.
18
+ Expertise in deep learning frameworks
19
+ Proficiency in programming languages such as Python, with experience in software development best practices and version control systems.
20
+ Strong understanding of machine learning fundamentals, including optimization algorithms, and model evaluation techniques
21
+ Experience with intelligent document processing techniques such as OCR (Optical Character Recognition), NLP (Natural Language Processing), and document understanding. Preferred Skills
22
+ Familiarity with document processing frameworks and libraries such as Tesseract,
23
+ Experience with cloud computing platforms (e.g., AWS, Azure, GCP) and distributed computing frameworks (e.g., Apache Spark, Hadoop)
24
+ Strong analytical and problem-solving skills, with a passion for pushing the boundaries of machine learning and artificial intelligence in document processing applications.
Code+Folder/jd_data/mlops1_mindtree.txt ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Design the data pipelines and engineering infrastructure to support our clients’ enterprise machine learning systems at scale
2
+ Take offline models data scientists build and turn them into a real machine learning production system
3
+ Develop and deploy scalable tools and services for our clients to handle machine learning training and inference
4
+ Identify and evaluate new technologies to improve performance, maintainability, and reliability of our clients’ machine learning systems
5
+ Apply software engineering rigor and best practices to machine learning, including CI/CD, automation, etc.
6
+ Support model development, with an emphasis on auditability, versioning, and data security
7
+ Facilitate the development and deployment of proof-of-concept machine learning systems
8
+ Communicate with clients to build requirements and track progress
9
+ Qualifications
10
+ Experience building end-to-end systems as a Platform Engineer, ML DevOps Engineer, or Data Engineer (or equivalent)
11
+ Strong software engineering skills in complex, multi-language systems
12
+ Fluency in Python
13
+ Comfort with Linux administration
14
+ Experience working with cloud computing and database systems
15
+ Experience building custom integrations between cloud-based systems using APIs
16
+ Experience developing and maintaining ML systems built with open source tools
17
+ Experience developing with containers and Kubernetes in cloud computing environments
18
+ Familiarity with one or more data-oriented workflow orchestration frameworks (KubeFlow, Airflow, Argo, etc.)
19
+ Ability to translate business needs to technical requirements
20
+ Strong understanding of software testing, benchmarking, and continuous integration
21
+ Exposure to machine learning methodology and best practices
22
+ Total Experience- 3 to 5 Years.
Code+Folder/jd_data/mlops2_convin.txt ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Founded by IIT Delhi Alumni, Convin is a conversation intelligence platform that helps organisations improve sales/collections and elevate customer experience while automating the quality & coaching for reps, and backing it up with super deep business insights for leaders.
2
+ At Convin, we are leveraging AI/ML to achieve these larger business goals while focusing on bringing efficiency and reducing cost.
3
+
4
+ Who are our clients:
5
+ We are already helping the leaders across Health-tech, Ed-tech, Fintech, E-commerce, and consumer services like Treebo, SOTC, Thomas Cook, Aakash, MediBuddy, PlanetSpark. But as the list is growing at tremendous pace, we are looking for folks who are excited about taking this to the market with us.
6
+ Here’s why you should apply:
7
+ If you love AI, understand SaaS, love selling and looking to join a ship bound to fly- then Convin is the place for you.
8
+ We are a young (pre series A) company, and are hungry to go big. If you are looking to build a team, product with energy, and not too many rules- we are the exact place you are looking for.
9
+ Key Responsibilities:
10
+ Collaborate with cross-functional teams to design, develop, and maintain robust backend systems that power our applications.
11
+ Utilize your proficiency in Python to implement efficient and scalable backend solutions.
12
+ Employ your experience with ChatGPT or similar language models to integrate natural language understanding and generation capabilities into our applications.
13
+ Contribute to the architecture, design, and development of APIs to facilitate smooth communication between frontend, ML, and backend components.
14
+ Work closely with the DevOps team to assist in the deployment, monitoring, and scaling of applications in various cloud environments.
15
+ Collaborate on continuous integration and continuous deployment (CI/CD) pipelines to streamline the software development lifecycle.
16
+ Stay up-to-date with industry trends, emerging technologies, and best practices to drive innovation within the team.
17
+ Requirements:
18
+ Approximately 2 years of professional experience in backend development.
19
+ Strong proficiency in Python, with the ability to write clean, maintainable, and efficient code.
20
+ Familiarity with ChatGPT or similar natural language processing models is highly desirable.
21
+ Basic understanding of DevOps concepts and practices, including deployment, monitoring, and scaling.
22
+ Exposure to various cloud services (e.g., AWS, Azure, Google Cloud) and their offerings.
23
+ A solid foundation in Computer Science, with a degree in Computer Science or related field preferred.
24
+ Excellent problem-solving skills and a proactive attitude towards tackling challenges.
25
+ Strong communication skills and the ability to collaborate effectively within cross-functional teams.
26
+ A keen interest in staying updated with technological advancements and a passion for continuous learning.
Code+Folder/jd_data/mlops3_exela.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Requirements:
2
+ Bachelor's or Master's degree in a quantitative field (CS, machine learning, mathematics, statistics) or equivalent experience.
3
+ Academic excellence with a minimum of 80% in 10th and 12th grades and a First Class at the graduate/post-graduate degree.
4
+ 4 to 5 years of extensive experience as a Data Scientist with expertise in building, deploying and operationalizing ML models.
5
+ Excellent programming skills in Python. Strong working knowledge of Pythons numerical, data analysis or popular ML packages such as NumPy, Pandas, Scikit-learn, Jupyter, TensorFlow etc.
6
+ Key Data Science Skills:
7
+ Expertise in Natural Language Processing (NLP) and BERT/BERT-like transformer models.
8
+ Understanding of foundational concepts behind LLM and Generative AI.
9
+ Expertise in ML/LLM models development, deployment and operationalization.
10
+ Proficiency in MLOps processes and tools for deployment, scaling and monitoring of ML/LLM models.
11
+ Knowledge of ML lifecycle management tools such as MLflow, Seldon, or Kubeflow.
12
+ Strong understanding of containerization and orchestration technologies including Docker and Kubernetes.
13
+ Knowledge of GPU computing, GPU acceleration and frameworks for parallel executions is a big plus.
14
+ Knowledge of best practices for logging, monitoring, alerting and testing of production ML workloads.
15
+ Experience with ML model governance, ethics, privacy and security considerations.
16
+ Excellent communication and presentation skills.
17
+ Experience with cloud-based AI platforms and services such as Azure, GCP is preferred
Code+Folder/jd_data/mlops4_exl.txt ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Skills/ Qualifications Required:
2
+ Relevant experience in ML Engineering/ ML Ops role with an end-to-end understanding of ML based project’s solution design & architecture, development, implementation & deployment
3
+ Should fulfill all the standard MLOps level 2 requirements for CI/CD + CT pipeline automation
4
+ Strong grasp & hands on experience with production ready scalable code using SQL (advance) and Python
5
+ Hands-on experience in working on any of the of cloud stacks: AWS/ Azure/ GCP
6
+ Good communication skills
7
+ Can work hands-on independently.
8
+ Bachelor’s degree from Tier I/II colleges preferred
9
+ 3+ years of work experience with MLOps
10
+ 2+ years of work experience with Continuous Integration and Continuous Delivery (CI/CD)
11
+ Job Responsibilities
12
+ Actively own & manage client deliverables.
13
+ Design solution architectures and pipelines for ML applications.
14
+ Create ML prototypes, design ML systems, develop automated ML application pipelines (across data collection, processing, cleaning, transformation etc. aspects) under the constraints of scalability, correctness, and maintainability.
15
+ Implement model evaluation and model + data validation tools/ techniques such as schema validation, evaluation metrics etc.
16
+ Develop and deploy fCI/CD based automated ML application pipelines (collection, processing, cleaning, transformation etc.) along with the CT component for continuous feedback loop for re-training.
17
+ Strong skills in Feature store setup, Pipeline Integration, Automated triggering, Model Continuous Delivery, Model Serving (via APIs) & Model Monitoring
18
+ Responsible for productionizing and making the models available as APIs / micro services.
19
+ Promote a practice of unifying system development (Dev) and system operations (Ops)
20
+ Ensure output’s thorough quality check & provide analytics driven insights and next steps.
21
+ To perform statistical analysis and fine-tune models using test results.
22
+ Understand data and different platforms used by the client.
23
+ Actively contribute towards problem solving & mentor juniors in the team
Code+Folder/readme.md ADDED
@@ -0,0 +1,254 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Resume Analyzer
2
+
3
+ A Streamlit application that analyzes resumes against job descriptions using OpenAI's GPT models.
4
+
5
+ ## Features
6
+ - Resume PDF upload and analysis
7
+ - Job Description comparison
8
+ - Detailed report generation
9
+ - Interactive chat interface
10
+ - Real-time feedback system
11
+
12
+ ## Setup
13
+ 1. Clone the repository
14
+ 2. Install requirements: `pip install -r requirements.txt`
15
+ 3. Run the app: `streamlit run src/resume_suggestions.py`
16
+
17
+ ## Live Demo
18
+ [Link to your Streamlit Cloud deployment]
19
+
20
+ # Resume Suggestion Application
21
+
22
+ This repository contains the code to run a Resume Suggestion Application. The application leverages machine learning models to analyze job descriptions and resumes, calculate similarity scores, and provide suggestions. This README provides step-by-step instructions to set up the environment, install required dependencies, and run the application on both Windows and Mac systems.
23
+
24
+ ## Table of Contents
25
+ - [Prerequisites](#prerequisites)
26
+ - [Installation](#installation)
27
+ - [Setting Up a Virtual Environment](#setting-up-a-virtual-environment)
28
+ - [Using `virtualenv`](#using-virtualenv)
29
+ - [Using `venv`](#using-venv)
30
+ - [Installing Dependencies](#installing-dependencies)
31
+ - [Running the Application](#running-the-application)
32
+ - [Adding Paths for Poppler and Tesseract in Python](#adding-paths-for-poppler-and-tesseract-in-python)
33
+ - [Troubleshooting](#troubleshooting)
34
+
35
+ ## Prerequisites
36
+ Before running the application, you need to install the following dependencies on your system:
37
+
38
+ 1. **Python 3.10**: Ensure that Python is installed and accessible from the command line.
39
+ 2. **pip**: Ensure that pip is installed for managing Python packages.
40
+
41
+ ## Installation
42
+
43
+ ### Installing Poppler
44
+
45
+ #### **Windows**
46
+ 1. Download the latest version of Poppler for Windows from [this link](https://github.com/oschwartz10612/poppler-windows/releases/).
47
+ 2. Extract the downloaded zip file to a directory, e.g., `C:\poppler`.
48
+ 3. Add the `bin` directory inside the extracted folder to your system's PATH:
49
+ - Open the Start Menu and search for "Environment Variables."
50
+ - Click on "Edit the system environment variables."
51
+ - In the "System Properties" window, click the "Environment Variables" button.
52
+ - Under "System variables," find and select the `Path` variable, then click "Edit."
53
+ - Click "New" and add the path to the `bin` directory, e.g., `C:\poppler\bin`.
54
+ - Click "OK" to close all windows.
55
+
56
+ #### **Mac**
57
+ 1. Install Poppler via Homebrew:
58
+ ```bash
59
+ brew install poppler
60
+ ```
61
+
62
+ ### Installing Tesseract
63
+
64
+ #### **Windows**
65
+ 1. Download the Tesseract installer from [this link](https://github.com/UB-Mannheim/tesseract/wiki).
66
+ 2. Run the installer and follow the instructions to install Tesseract.
67
+ 3. Ensure that the installer adds Tesseract to your system's PATH during installation.
68
+
69
+ #### **Mac**
70
+ 1. Install Tesseract via Homebrew:
71
+ ```bash
72
+ brew install tesseract
73
+ ```
74
+
75
+ ## Setting Up a Virtual Environment
76
+
77
+ It is recommended to use a virtual environment to manage dependencies for the project.
78
+
79
+ ### Using `virtualenv`
80
+
81
+ #### **Windows**
82
+
83
+ `pip install virtualenv`
84
+
85
+ `virtualenv venv`
86
+
87
+ `.\venv\Scripts\activate`
88
+
89
+ #### **Mac/Linux**
90
+
91
+ `pip install virtualenv`
92
+
93
+ `virtualenv venv`
94
+
95
+ `source venv/bin/activate`
96
+
97
+
98
+ ### Python version 3.10.4
99
+
100
+ To create a virtual environment and install requirements in Python 3.10.4 on different operating systems, follow the instructions below:
101
+
102
+ ### For Windows:
103
+
104
+ Open the Command Prompt by pressing Win + R, typing "cmd", and pressing Enter.
105
+
106
+ Change the directory to the desired location for your project:
107
+
108
+
109
+ `cd C:\path\to\project`
110
+
111
+ Create a new virtual environment using the venv module:
112
+
113
+
114
+ `python -m venv myenv`
115
+
116
+ Activate the virtual environment:
117
+ `myenv\Scripts\activate`
118
+
119
+
120
+ Install the project requirements using pip:
121
+ `pip install -r requirements.txt`
122
+
123
+ ### For Linux/Mac:
124
+ Open a terminal.
125
+
126
+ Change the directory to the desired location for your project:
127
+
128
+ `cd /path/to/project`
129
+
130
+ Create a new virtual environment using the venv module:
131
+
132
+ `python3.10 -m venv myenv`
133
+
134
+
135
+ Activate the virtual environment:
136
+ `source myenv/bin/activate`
137
+
138
+ Install the project requirements using pip:
139
+ `pip install -r requirements.txt`
140
+
141
+ These instructions assume you have Python 3.10.4 installed and added to your system's PATH variable.
142
+
143
+ ### Execution Instructions if Multiple Python Versions Installed
144
+
145
+ If you have multiple Python versions installed on your system, you can use the Python Launcher to create a virtual environment with Python 3.10.4. Specify the version using the -p or --python flag. Follow the instructions below:
146
+
147
+ For Windows:
148
+ Open the Command Prompt by pressing Win + R, typing "cmd", and pressing Enter.
149
+
150
+ Change the directory to the desired location for your project:
151
+
152
+ `cd C:\path\to\project`
153
+
154
+ Create a new virtual environment using the Python Launcher:
155
+
156
+ `py -3.10 -m venv myenv`
157
+
158
+ Note: Replace myenv with your desired virtual environment name.
159
+
160
+ Activate the virtual environment:
161
+
162
+
163
+ `myenv\Scripts\activate`
164
+
165
+
166
+ Install the project requirements using pip:
167
+
168
+ `pip install -r requirements.txt`
169
+
170
+
171
+ ### For Linux/Mac:
172
+ Open a terminal.
173
+
174
+ Change the directory to the desired location for your project:
175
+
176
+ `cd /path/to/project`
177
+
178
+ Create a new virtual environment using the Python Launcher:
179
+
180
+
181
+ `python3.10 -m venv myenv`
182
+
183
+
184
+ Note: Replace myenv with your desired virtual environment name.
185
+
186
+ Activate the virtual environment:
187
+
188
+ `source myenv/bin/activate`
189
+
190
+
191
+ Install the project requirements using pip:
192
+
193
+ `pip install -r requirements.txt`
194
+
195
+
196
+ By specifying the version using py -3.10 or python3.10, you can ensure that the virtual environment is created using Python 3.10.4 specifically, even if you have other Python versions installed.
197
+
198
+
199
+ To run the streamlit app
200
+
201
+ Add OpenAI API Key in Constants
202
+ `
203
+ streamlit run src/resume_suggestions.py
204
+ `
205
+
206
+ This will start a local web server and open the application in your default web browser.
207
+
208
+ Adding Paths for Poppler and Tesseract in Python
209
+ If you encounter issues with Tesseract or Poppler not being detected, you can manually specify the paths in your Python code:
210
+
211
+ For Tesseract
212
+
213
+ `import pytesseract`
214
+
215
+ `pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe`
216
+
217
+ # Windows
218
+
219
+ # On Mac, this step is usually unnecessary if installed via Homebrew.
220
+ For Poppler
221
+
222
+ `from pdf2image import convert_from_path`
223
+
224
+ `images = convert_from_path('your_pdf_file.pdf', poppler_path=r'C:\path\to\poppler\bin') ` # Windows
225
+
226
+
227
+ # On Mac, this step is usually unnecessary if installed via Homebrew.
228
+ Troubleshooting
229
+ Common Issues and Solutions
230
+ Poppler/Tesseract Not Found:
231
+
232
+ Ensure that the paths are correctly set in your system's environment variables.
233
+ You can specify the path directly in your Python code as shown in the section above.
234
+
235
+
236
+
237
+ ```
238
+ ├─ jd_data/
239
+ ├─ output
240
+ │ ├─ jd_embeddings_large.pkl
241
+ │ └─ resume_embeddings_large.pkl
242
+ ├─ readme.md
243
+ ├─ requirements.txt
244
+ ├─ resume_data/
245
+ ├─ Resume_Scorer.ipynb
246
+ ├─ Resume_Suggestions.ipynb
247
+ └─ src
248
+ ├─ constants.py
249
+ ├─ directory_reader.py
250
+ ├─ embedding_model.py
251
+ ├─ resume_scorer.py
252
+ └─ resume_suggestions.py
253
+
254
+ ```
Code+Folder/requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ langchain
3
+ langchain-openai
4
+ openai
5
+ streamlit-feedback
6
+ PyPDF2
7
+ pypdf
8
+ tqdm
9
+ numpy
10
+ pandas
11
+ scikit-learn
12
+ python-dotenv
Code+Folder/src/constants.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ JD_PATH = "../jd_data/*"
2
+ RESUME_PATH = "../resume_data/*/*"
3
+ EMBEDDING_MODEL_NAME = "text-embedding-3-large"
4
+ OUTPUT_PATH = "./output/"
5
+
6
+ JD_EMBEDDINGS_FILENAME = "jd_embeddings_large.pkl"
7
+ RESUME_EMBEDDINGS_FILENAME = "resume_embeddings_large.pkl"
8
+ IS_EMBEDDINGS_CREATED = True
9
+ OPENAI_MODEL_NAME = "gpt-4o-mini-2024-07-18"
10
+ TEMPLATE_CONTENT = """You are a helpful assistant. You do not respond as 'User' or pretend to be 'User'. You only
11
+ respond once as 'assistant'.
12
+
13
+ System Role: Resume Reviewer
14
+
15
+ Your role is to act as a resume reviewer. You will assist users in improving their resumes to better align with specific
16
+ job descriptions. Provide professional advice on resume building, interview preparation, and career development.
17
+ Offer constructive feedback and encouragement. Whenever you are given a resume and a job description, there will be
18
+ tokens added before and after the resume and job description. The tokens are as follows: <RESUME STARTS HERE> and
19
+ <RESUME ENDS HERE> for the resume and <JOB DESCRIPTION STARTS HERE> and <JOB DESCRIPTION ENDS HERE> for the job
20
+ description. Utilize these tokens to provide feedback and suggestions and clearly segregate the resume and job
21
+ description. Do not mix up the content of the resume and job description. In case the resume or job requirements
22
+ in the description do not align with each other, do not mix up the content of the resume and job description and
23
+ keep them separate and process them accordingly. Provide feedback based only on the content provided.
24
+ Strictly ONLY answer the question if it is relevant to resume and job description provided otherwise reply with
25
+ 'Please ask a relevant question'. Do not answer general knowledge questions.
26
+ """
27
+
28
+ comparison_prompt = "Compare the resume: <RESUME STARTS HERE> {}. <RESUME ENDS HERE> with the job description: <JOB DESCRIPTION STARTS HERE> {}.<JOB DESCRIPTION ENDS HERE> Do they match? If not, what are the gaps? Do not make any assumptions about the candidate's skills or experience or the job requirements."
29
+ resume_analysis_prompt = "Provide a detailed summary of the candidate's skills, experience, and qualifications based on the content of the following resume: <RESUME STARTS HERE> {}. <RESUME ENDS HERE>"
30
+ job_description_analysis_prompt = "List the key skills, qualifications, and experience required as outlined in the following job description: <JOB DESCRIPTION STARTS HERE> {}. <JOB DESCRIPTION ENDS HERE>"
31
+ gap_analysis_prompt = "Compare the skills and experience detailed in this resume: <RESUME STARTS HERE> {} <RESUME ENDS HERE> with the requirements listed in the job description: <JOB DESCRIPTION STARTS HERE> {}. <JOB DESCRIPTION ENDS HERE> Identify any gaps or mismatches."
32
+ actionable_steps_prompt = "Given the gaps identified between the resume: <RESUME STARTS HERE> {} <RESUME ENDS HERE> and the job description: <JOB DESCRIPTION STARTS HERE> {} <JOB DESCRIPTION ENDS HERE>, suggest actionable steps for the candidate to acquire the necessary skills and experience."
33
+ experience_enhancement_prompt = "Based on the candidate's experience outlined in this resume: <RESUME STARTS HERE> {} <RESUME ENDS HERE>, recommend practical activities or steps to gain or improve the experience aligned with the needs of this role: <JOB DESCRIPTION STARTS HERE> {}. <JOB DESCRIPTION ENDS HERE>"
34
+ additional_qualifications_prompt = "For areas where this resume: <RESUME STARTS HERE> {} <RESUME ENDS HERE> falls short or does not satisfy the requirements of the job role : <JOB DESCRIPTION STARTS HERE> {} <JOB DESCRIPTION ENDS HERE>, suggest specific areas for improvement. Include recommendations for additional qualifications or certifications."
35
+ resume_tailoring_prompt = "Advise on how the candidate can tailor their resume: <RESUME STARTS HERE> {} <RESUME ENDS HERE> to align more closely with this job description: <JOB DESCRIPTION STARTS HERE> {} <JOB DESCRIPTION ENDS HERE>, focusing on emphasizing skills and experiences relevant to the job description."
36
+ relevant_skills_highlight_prompt = "Analyze this resume: <RESUME STARTS HERE> {} <RESUME ENDS HERE> and provide suggestions on restructuring it to foreground skills and experiences pertinent to the job description: <JOB DESCRIPTION STARTS HERE> {}. <JOB DESCRIPTION ENDS HERE>"
37
+ resume_formatting_prompt = "Offer guidance on how the candidate can enhance the formatting of their resume: <RESUME STARTS HERE> {} <RESUME ENDS HERE> to improve visual appeal and readability."
38
+ resume_length_prompt = "Recommend strategies for the candidate to adjust the length of their resume: <RESUME STARTS HERE> {} <RESUME ENDS HERE>, ensuring it is concise while remaining aligned with the requirements in the job description: <JOB DESCRIPTION STARTS HERE> {}. <JOB DESCRIPTION ENDS HERE>"
39
+
40
+ # OPENAI_API_KEY is used but not defined in this file
41
+ # OPENAI_API_KEY = "your-openai-api-key-here" # Replace with your actual API key
Code+Folder/src/directory_reader.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ try:
2
+ import cv2
3
+ import numpy as np
4
+ from pdf2image import convert_from_path
5
+ import pytesseract
6
+ CV2_AVAILABLE = True
7
+ except ImportError:
8
+ CV2_AVAILABLE = False
9
+
10
+ from glob import glob
11
+ from pypdf import PdfReader
12
+ from tqdm import tqdm
13
+ import os
14
+ import PyPDF2
15
+ import io
16
+
17
+ class DirectoryReader:
18
+ """
19
+ A class to read and process job description (JD) files and resume files from specified directories.
20
+ """
21
+ def __init__(self, path_to_jds, path_to_resumes):
22
+ """
23
+ Initializes the DirectoryReader with paths to job descriptions and resumes.
24
+
25
+ Args:
26
+ path_to_jds (str): Path to the directory containing job description files.
27
+ path_to_resumes (str): Path to the directory containing resume files.
28
+ """
29
+ self.path_to_jds = path_to_jds
30
+ self.path_to_resumes = path_to_resumes
31
+ self.jd_data = {}
32
+ self.resume_data = {}
33
+
34
+ def read_jd_files(self):
35
+ """
36
+ Reads job description files from the specified directory and stores the content in jd_data attribute.
37
+
38
+ Returns:
39
+ dict: A dictionary with job names as keys and the corresponding job descriptions as values.
40
+ """
41
+ file_list = glob(self.path_to_jds, recursive=True)
42
+ for file in tqdm(file_list):
43
+ with open(file, "r", encoding="utf-8") as f:
44
+ data = f.read()
45
+ data = data.strip().lower()
46
+ job_name = file.split("/")[-1].replace(".txt", "")
47
+ self.jd_data[job_name] = data
48
+ return self.jd_data
49
+
50
+ @staticmethod
51
+ def extract_text_from_pdf(file):
52
+ reader = PdfReader(file)
53
+ data = ""
54
+ for page in reader.pages:
55
+ data = data + page.extract_text() + "\n"
56
+ data = data.strip().lower()
57
+ return data
58
+
59
+ def extract_text_from_image(self, file):
60
+ if not CV2_AVAILABLE:
61
+ raise ImportError("OpenCV, numpy, pdf2image, or pytesseract is not installed. Install these packages to use image processing features.")
62
+
63
+ pages = convert_from_path(file)
64
+ extracted_text = []
65
+ for page in pages:
66
+ # Step 1: Preprocess the image (deskew)
67
+ preprocessed_image = self.deskew(np.array(page))
68
+ # Step 2: Extract text using OCR
69
+ text = self.get_text_from_image(preprocessed_image)
70
+ extracted_text.append(text)
71
+ return "\n".join(extracted_text).strip().lower()
72
+
73
+ def read_resume_files(self):
74
+ """
75
+ Reads resume files from the specified directory and stores the content in resume_data attribute.
76
+ If the resume file is a PDF containing images, OCR is used to extract text.
77
+
78
+ Returns:
79
+ dict: A dictionary with resume identifiers as keys and the corresponding resume texts as values.
80
+ """
81
+ file_list = glob(self.path_to_resumes, recursive=True)
82
+ for file in tqdm(file_list):
83
+ file_parts = os.path.normpath(file).split(os.sep)
84
+ # The job title would be the name of the directory just before the file name
85
+ job_title = file_parts[-2].replace(" ", "_").lower()
86
+ # The resume name would be the file name without the extension
87
+ resume_name = os.path.basename(file_parts[-1]).replace("-", "_").lower().replace(".pdf", "")
88
+ data = self.extract_text_from_pdf(file)
89
+ if len(data) > 1:
90
+ self.resume_data[job_title + "_" + resume_name] = data
91
+ else: # to solve for incorrect startxref pointer(3), since they are images in pdf
92
+ self.resume_data[job_title + "_" + resume_name] = self.extract_text_from_image(file)
93
+ return self.resume_data
94
+
95
+
96
+ @staticmethod
97
+ def deskew(image):
98
+ if not CV2_AVAILABLE:
99
+ raise ImportError("OpenCV is not installed. Install opencv-python to use this feature.")
100
+
101
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
102
+ gray = cv2.bitwise_not(gray)
103
+ coords = np.column_stack(np.where(gray > 0))
104
+ angle = cv2.minAreaRect(coords)[-1]
105
+
106
+ if angle < -45:
107
+ angle = -(90 + angle)
108
+ else:
109
+ angle = -angle
110
+
111
+ (h, w) = image.shape[:2]
112
+ center = (w // 2, h // 2)
113
+ M = cv2.getRotationMatrix2D(center, angle, 1.0)
114
+ rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
115
+ return rotated
116
+
117
+ @staticmethod
118
+ def get_text_from_image(image):
119
+ if not CV2_AVAILABLE:
120
+ raise ImportError("Pytesseract is not installed. Install pytesseract to use this feature.")
121
+
122
+ text = pytesseract.image_to_string(image)
123
+ return text
124
+
125
+ def extract_text_from_pdf(self, pdf_file):
126
+ """Extract text from a PDF file uploaded via Streamlit"""
127
+ try:
128
+ # Check file size
129
+ file_size = len(pdf_file.getvalue()) / (1024 * 1024) # Size in MB
130
+ if file_size > 5:
131
+ return "Error: File size exceeds 5MB limit. Please upload a smaller file."
132
+
133
+ pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_file.getvalue()))
134
+ text = ""
135
+ for page in pdf_reader.pages:
136
+ text += page.extract_text() or "" # Handle None returns
137
+
138
+ if not text.strip():
139
+ return "Error: Could not extract text from PDF. The file might be scanned or image-based."
140
+
141
+ return text
142
+ except Exception as e:
143
+ return f"Error processing PDF: {str(e)}"
Code+Folder/src/embedding_model.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_openai import OpenAIEmbeddings
2
+ import pickle
3
+ from constants import EMBEDDING_MODEL_NAME, OPENAI_API_KEY, OUTPUT_PATH
4
+
5
+
6
+ class EmbeddingModel:
7
+ """
8
+ A class to handle the creation, saving, and reading of embeddings using OpenAI's embedding model.
9
+ """
10
+ def __init__(self):
11
+ """
12
+ Initializes the EmbeddingModel with the specified OpenAI embedding model and API key.
13
+ """
14
+ self.embedding_model = OpenAIEmbeddings(api_key=OPENAI_API_KEY, model=EMBEDDING_MODEL_NAME)
15
+
16
+ def get_embeddings(self, data_dict):
17
+ """
18
+ Generates embeddings for the given data.
19
+
20
+ Args:
21
+ data_dict (dict): A dictionary where the keys are identifiers and the values are the texts to be embedded.
22
+
23
+ Returns:
24
+ dict: A dictionary with the same keys as the input and the corresponding embeddings as values.
25
+ """
26
+ output_dict = {}
27
+ keys = list(data_dict.keys())
28
+ values = list(data_dict.values())
29
+ embeddings = self.embedding_model.embed_documents(values)
30
+ for i in range(0, len(keys)):
31
+ output_dict[keys[i]] = embeddings[i]
32
+ return output_dict
33
+
34
+ @staticmethod
35
+ def save_embeddings(embedding, file_name):
36
+ """
37
+ Saves the given embeddings to a file.
38
+
39
+ Args:
40
+ embedding (dict): The embeddings to be saved.
41
+ file_name (str): The name of the file to save the embeddings to.
42
+ """
43
+ with open(OUTPUT_PATH + file_name, 'wb') as handle:
44
+ pickle.dump(embedding, handle, protocol=pickle.HIGHEST_PROTOCOL)
45
+
46
+ @staticmethod
47
+ def read_embeddings(file_name):
48
+ """
49
+ Reads embeddings from a file.
50
+
51
+ Args:
52
+ file_name (str): The name of the file to read the embeddings from.
53
+
54
+ Returns:
55
+ dict: The embeddings read from the file.
56
+ """
57
+ with open(OUTPUT_PATH + file_name, 'rb') as handle:
58
+ output_dict = pickle.load(handle)
59
+ return output_dict
Code+Folder/src/resume_scorer.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+ import re
4
+ from sklearn.metrics.pairwise import cosine_similarity
5
+ from constants import RESUME_PATH, JD_PATH, JD_EMBEDDINGS_FILENAME, RESUME_EMBEDDINGS_FILENAME, \
6
+ IS_EMBEDDINGS_CREATED
7
+ from directory_reader import DirectoryReader
8
+ from embedding_model import EmbeddingModel
9
+
10
+
11
+ def create_embeddings():
12
+ """
13
+ Reads job descriptions (JDs) and resumes, generates embeddings for each, and saves the embeddings to disk.
14
+ """
15
+ dir_reader = DirectoryReader(JD_PATH, RESUME_PATH)
16
+ print("Reading JDs........")
17
+ jd_data = dir_reader.read_jd_files()
18
+ print("Reading Resumes........")
19
+ resume_data = dir_reader.read_resume_files()
20
+ print("Number of JDs -> ", len(jd_data))
21
+ print("Number of Resumes -> ", len(resume_data))
22
+
23
+ # Generate and Save JD Embeddings
24
+ print("Generating embeddings for JDs........")
25
+ embedding_model = EmbeddingModel()
26
+ jd_embeddings = embedding_model.get_embeddings(jd_data)
27
+ embedding_model.save_embeddings(jd_embeddings, JD_EMBEDDINGS_FILENAME)
28
+ print("Embeddings generated from JDs")
29
+
30
+ # Generate and Save Resume Embeddings
31
+ print("Generating embeddings for Resumes........")
32
+ resume_embeddings = embedding_model.get_embeddings(resume_data)
33
+ embedding_model.save_embeddings(resume_embeddings, RESUME_EMBEDDINGS_FILENAME)
34
+ print("Embeddings generated for Resumes")
35
+
36
+
37
+ def read_embeddings():
38
+ """
39
+ Reads job description and resume embeddings from disk.
40
+
41
+ Returns:
42
+ tuple: A tuple containing dictionaries of job description embeddings and resume embeddings.
43
+ """
44
+ embedding_model = EmbeddingModel()
45
+ jd_embeddings = embedding_model.read_embeddings(JD_EMBEDDINGS_FILENAME)
46
+ resume_embeddings = embedding_model.read_embeddings(RESUME_EMBEDDINGS_FILENAME)
47
+ return jd_embeddings, resume_embeddings
48
+
49
+
50
+ def get_similarity_dict(jd_embeddings, resume_embeddings):
51
+ """
52
+ Computes cosine similarity between job description embeddings and resume embeddings.
53
+
54
+ Args:
55
+ jd_embeddings (dict): A dictionary of job description embeddings.
56
+ resume_embeddings (dict): A dictionary of resume embeddings.
57
+
58
+ Returns:
59
+ dict: A dictionary where keys are resume names and values are dictionaries with job description names and their similarity scores.
60
+ """
61
+ resume_jd_combi_to_match = {"data_engineer": "de", "data_analyst": "dataanalyst",
62
+ "big_data_analyst": "bigdataanalyst",
63
+ "mlops_engineer": "mlops", "data_scientist": "ds", "data_architect": "da",
64
+ "machine_learning_engineer": "mle", "business_intelligence_analyst": "bianalyst"}
65
+
66
+ jd_pattern = re.compile(r'\d+_[a-z]+$')
67
+ resume_pattern = re.compile(r'_resume_\d+$')
68
+ similarity_dict = {}
69
+ for key1 in jd_embeddings.keys():
70
+ for key2 in resume_embeddings.keys():
71
+ cleaned_jd_category = jd_pattern.sub('', key1).replace('jd_data\\', '').replace('jd_data/', '')
72
+ cleaned_resume_category = resume_pattern.sub('', key2)
73
+ if resume_jd_combi_to_match[cleaned_resume_category] == cleaned_jd_category:
74
+ sim_score = cosine_similarity(np.array(jd_embeddings[key1]).reshape(1, -1),
75
+ np.array(resume_embeddings[key2]).reshape(1, -1))[0][0]
76
+ if key2 not in similarity_dict.keys():
77
+ similarity_dict[key2] = {}
78
+ similarity_dict[key2][key1] = {"score": sim_score}
79
+ else:
80
+ continue
81
+ return similarity_dict
82
+
83
+
84
+ def get_top_matching_job(resume_name):
85
+ """
86
+ Finds the top matching job description for a given resume based on similarity scores.
87
+
88
+ Args:
89
+ resume_name (str): The name of the resume.
90
+
91
+ Returns:
92
+ list: A list containing the job description name and the matching score.
93
+ """
94
+ score_list = [[key, SIMILARITY_DICT[resume_name][key]['score']]
95
+ for key in SIMILARITY_DICT[resume_name].keys()]
96
+ score_list = sorted(score_list, key=lambda x: x[1], reverse=True)
97
+ return score_list[0]
98
+
99
+
100
+ if not IS_EMBEDDINGS_CREATED:
101
+ create_embeddings()
102
+
103
+ jd_embeddings, resume_embeddings = read_embeddings()
104
+ SIMILARITY_DICT = get_similarity_dict(jd_embeddings, resume_embeddings)
105
+ output = []
106
+ for key in SIMILARITY_DICT.keys():
107
+ top_matching_job = get_top_matching_job(key)
108
+ output.append([key, top_matching_job[0], int(round(top_matching_job[1] * 100.0))])
109
+ print("Resume Name: ", key, "\nJD Name: ", top_matching_job[0],
110
+ "\nMatching Score: ", int(round(top_matching_job[1] * 100.0)))
111
+ print("----------")
112
+
113
+ match_df = pd.DataFrame(output, columns=["resume_name", "jd_name", "matching_score"])
114
+ match_df.head(100)
115
+
116
+
117
+ get_top_matching_job("big_data_analyst_resume_1")
Code+Folder/src/resume_suggestions.py ADDED
@@ -0,0 +1,229 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ from streamlit_feedback import streamlit_feedback
4
+ from langchain.chains import ConversationChain
5
+ from langchain.memory import ConversationBufferWindowMemory
6
+ from langchain_core.messages import SystemMessage
7
+ from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
8
+ from langchain_openai import ChatOpenAI
9
+ from constants import OPENAI_MODEL_NAME, TEMPLATE_CONTENT, comparison_prompt, resume_analysis_prompt, \
10
+ job_description_analysis_prompt, gap_analysis_prompt, actionable_steps_prompt, experience_enhancement_prompt, \
11
+ additional_qualifications_prompt, resume_tailoring_prompt, relevant_skills_highlight_prompt, \
12
+ resume_formatting_prompt, resume_length_prompt
13
+ from directory_reader import DirectoryReader
14
+
15
+ st.set_page_config(page_title="Resume Reviewer")
16
+
17
+ # Initialize llm as None at the top level
18
+ llm = None
19
+ resume_chain = None
20
+
21
+ # Add API key input in the sidebar
22
+ with st.sidebar:
23
+ st.title('Resume Reviewer')
24
+ st.write("Upload your resume and JD for my recommendations.")
25
+
26
+ # API Key handling
27
+ if 'OPENAI_API_KEY' in st.secrets:
28
+ api_key = st.secrets['OPENAI_API_KEY']
29
+ os.environ['OPENAI_API_KEY'] = api_key
30
+ llm = ChatOpenAI(temperature=0.0, model=OPENAI_MODEL_NAME)
31
+ else:
32
+ # Fallback to manual input for local development
33
+ api_key = st.text_input("Enter your OpenAI API Key", type="password")
34
+ if api_key:
35
+ os.environ['OPENAI_API_KEY'] = api_key
36
+ llm = ChatOpenAI(temperature=0.0, model=OPENAI_MODEL_NAME)
37
+ else:
38
+ st.warning("Please enter your OpenAI API key to use this application")
39
+ llm = None
40
+
41
+ # Add upload method selection
42
+ upload_method = st.radio("Choose input method:", ["File Upload", "Text Input"])
43
+
44
+ if upload_method == "File Upload":
45
+ # Current file upload code
46
+ st.write("Note: File size should be less than 5MB")
47
+ resume_file = st.file_uploader("Upload your resume (pdf file only)", type=["pdf"], accept_multiple_files=False)
48
+ jd_file = st.file_uploader("Upload your JD (txt file only)", type=["txt"], accept_multiple_files=False)
49
+ else:
50
+ # Text input alternative
51
+ resume_text = st.text_area("Paste your resume content here:", height=200)
52
+ jd_text = st.text_area("Paste your job description here:", height=200)
53
+
54
+ # Create pseudo-file content when using text input
55
+ if resume_text and jd_text:
56
+ resume_content = resume_text
57
+ job_description_content = jd_text
58
+ st.sidebar.success("Text content received successfully!")
59
+
60
+ resume_content = None
61
+ job_description_content = None
62
+
63
+ if resume_file is not None and jd_file is not None and api_key:
64
+ try:
65
+ with st.spinner("Processing resume file..."):
66
+ directory_reader = DirectoryReader("", "")
67
+ resume_content = directory_reader.extract_text_from_pdf(resume_file)
68
+ st.sidebar.success("Resume processed successfully!")
69
+
70
+ with st.spinner("Processing job description file..."):
71
+ if jd_file.type == 'text/plain':
72
+ try:
73
+ from io import StringIO
74
+ stringio = StringIO(jd_file.getvalue().decode('utf-8'))
75
+ read_data = stringio.read()
76
+ job_description_content = read_data
77
+ st.sidebar.success("JD processed successfully!")
78
+ except Exception as e:
79
+ st.sidebar.error(f"Error processing JD file: {str(e)}")
80
+ job_description_content = None
81
+ except Exception as e:
82
+ st.sidebar.error(f"Error processing resume file: {str(e)}")
83
+ resume_content = None
84
+ job_description_content = None
85
+ else:
86
+ resume_content = None
87
+ job_description_content = None
88
+
89
+ SYSTEM_PROMPT = "\n\n" + TEMPLATE_CONTENT + "<RESUME STARTS HERE> {}. <RESUME ENDS HERE> with the job description: <JOB DESCRIPTION STARTS HERE> {}.<JOB DESCRIPTION ENDS HERE>\n\nBe crisp and clear in response.DO NOT provide the resume and job description in the response\n\n".format(resume_content, job_description_content)
90
+
91
+
92
+ # Store LLM generated responses
93
+ if "messages" not in st.session_state.keys():
94
+ st.session_state.messages = [{"role": "assistant", "content": "How may I assist you today?"}]
95
+
96
+ # Display or clear chat messages
97
+ for message in st.session_state.messages:
98
+ if message["role"] != "feedback":
99
+ with st.chat_message(message["role"]):
100
+ st.write(message["content"])
101
+
102
+
103
+ def clear_chat_history():
104
+ global resume_chain
105
+ st.session_state.messages = [{"role": "assistant", "content": "How may I help you today?"}]
106
+ resume_chain = setup_chain()
107
+
108
+
109
+ def generate_report():
110
+ user_message = {"role": "user", "content": "Generate a Report!"}
111
+ st.session_state.messages.append(user_message)
112
+ if resume_file is not None and jd_file is not None:
113
+ with st.chat_message("assistant"):
114
+ with st.spinner("Just a moment..."):
115
+
116
+ comparison_analysis = generate_response(comparison_prompt.format(resume_content, job_description_content))
117
+ resume_analysis = generate_response(resume_analysis_prompt.format(resume_content))
118
+ job_description_analysis = generate_response(
119
+ job_description_analysis_prompt.format(job_description_content))
120
+ gap_analysis = generate_response(gap_analysis_prompt.format(resume_content,
121
+ job_description_content))
122
+ actionable_steps_analysis = generate_response(actionable_steps_prompt.format(
123
+ resume_content, job_description_content))
124
+ experience_enhancement_analysis = generate_response(
125
+ experience_enhancement_prompt.format(resume_content, job_description_content))
126
+ additional_qualifications_analysis = generate_response(
127
+ additional_qualifications_prompt.format(resume_content, job_description_content))
128
+ resume_tailoring_analysis = generate_response(
129
+ resume_tailoring_prompt.format(resume_content, job_description_content))
130
+ relevant_skills_highlight_analysis = generate_response(
131
+ relevant_skills_highlight_prompt.format(resume_content, job_description_content))
132
+ resume_formatting_analysis = generate_response(
133
+ resume_formatting_prompt.format(resume_content, job_description_content))
134
+ resume_length_analysis = generate_response(
135
+ resume_length_prompt.format(resume_content, job_description_content))
136
+
137
+ # Compile the report
138
+ report = f"Comparison Analysis:\n{comparison_analysis}\n\n" \
139
+ f"Resume Analysis:\n{resume_analysis}\n\n" \
140
+ f"Job Description Analysis:\n{job_description_analysis}\n" \
141
+ f"\nGap Analysis:\n{gap_analysis} \n\n" \
142
+ f"Actionable Steps:\n{actionable_steps_analysis}\n\n" \
143
+ f"Experience Enhancement:\n{experience_enhancement_analysis}\n\n" \
144
+ f"Additional Qualifications:\n{additional_qualifications_analysis}\n\n" \
145
+ f"Resume Tailoring:\n{resume_tailoring_analysis}\n\n" \
146
+ f"Relevant Skills Highlight:\n{relevant_skills_highlight_analysis}\n\n" \
147
+ f"Resume Formatting:\n{resume_formatting_analysis}\n\n" \
148
+ f"Resume Length:\n{resume_length_analysis} "
149
+
150
+ report_message = {"role": "assistant", "content": report}
151
+ st.session_state.messages.append(report_message)
152
+ else:
153
+ st.error("Please upload a resume and enter a job description!")
154
+
155
+
156
+ # Setup the system message and prompt template
157
+ system_message = SystemMessage(content=TEMPLATE_CONTENT)
158
+ human_message = HumanMessagePromptTemplate.from_template("{history} User:{input} Assistant:")
159
+ prompt_template = ChatPromptTemplate(messages=[system_message, human_message], validate_template=True)
160
+ memory = ConversationBufferWindowMemory(k=2)
161
+
162
+ # Initialize the chain if llm is available
163
+ if llm is not None:
164
+ resume_chain = ConversationChain(
165
+ llm=llm,
166
+ prompt=prompt_template,
167
+ memory=memory,
168
+ verbose=False
169
+ )
170
+
171
+ def generate_response(prompt_input):
172
+ if resume_chain is None:
173
+ return "Please enter your OpenAI API key to use this application"
174
+ output = resume_chain.predict(input=prompt_input)
175
+ return output
176
+
177
+
178
+ st.sidebar.button('Clear Chat History', on_click=clear_chat_history)
179
+ st.sidebar.button('Generate Report', on_click=generate_report)
180
+
181
+
182
+ def get_feedback():
183
+ st.session_state.messages.append({"role": "feedback", "content": st.session_state.fbk})
184
+
185
+
186
+ # At the beginning of your script, initialize the prompt in session state
187
+ if "current_prompt" not in st.session_state:
188
+ st.session_state.current_prompt = ""
189
+
190
+ # When user enters a prompt
191
+ if prompt := st.chat_input():
192
+ st.session_state.current_prompt = prompt
193
+ st.session_state.messages.append({"role": "user", "content": prompt})
194
+ with st.chat_message("user"):
195
+ st.write(prompt)
196
+
197
+
198
+ def get_llm_response():
199
+ with st.chat_message("assistant"):
200
+ with st.spinner("Thinking..."):
201
+ response = generate_response(st.session_state.current_prompt + SYSTEM_PROMPT)
202
+ placeholder = st.empty()
203
+ placeholder.markdown(response)
204
+ full_response = response
205
+ message = {"role": "assistant", "content": full_response}
206
+ st.session_state.messages.append(message)
207
+ with st.form("form"):
208
+ streamlit_feedback(feedback_type="thumbs", optional_text_label="[Optional] Please provide an explanation", key="fbk")
209
+ st.form_submit_button('Save feedback', on_click=get_feedback)
210
+
211
+
212
+ # Generate a new response if last message is not from assistant
213
+ if st.session_state.messages[-1]["role"] not in ["assistant", "feedback"]:
214
+ get_llm_response()
215
+
216
+ if st.session_state.messages[-1]["role"] in ["feedback"]:
217
+ try:
218
+ feedback_response = st.session_state.messages[-1]["content"]
219
+ score_mappings = {
220
+ "thumbs": {"👍": 1, "👎": 0},
221
+ }
222
+ score = score_mappings[feedback_response["type"]][feedback_response["score"]]
223
+ if score == 0:
224
+ feedback = st.session_state.messages[-1]["content"]['text']
225
+ prompt = "Please respond according to feedback '{0}' on the previous response on \n".format(feedback) \
226
+ + st.session_state.messages[-3]["content"]
227
+ get_llm_response()
228
+ except:
229
+ pass