XSL Transformation Scripts and Examples
Script to transform a file in NHS0098 format into a csv format suitable for use with the People bulk load template.
Script
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:fn="http://www.w3.org/2005/xpath-functions" xmlns:math="http://www.w3.org/2005/xpath-functions/math" xmlns:array="http://www.w3.org/2005/xpath-functions/array" xmlns:map="http://www.w3.org/2005/xpath-functions/map" xmlns:xhtml="http://www.w3.org/1999/xhtml" xmlns:err="http://www.w3.org/2005/xqt-errors" xmlns:loc="http://tis.nhs.gov.uk" exclude-result-prefixes="array fn map math xhtml xs err loc" version="3.0">
<!--
-->
<xsl:output method="text" version="1.0" encoding="UTF-8" indent="yes"/>
<!--
-->
<xsl:function name="loc:human_date_from_digits" as="xs:string">
<xsl:param name="p_digits" as="xs:string"/>
<xsl:sequence select="fn:substring($p_digits, 7,2) || '/' || fn:substring($p_digits, 5, 2) || '/' || fn:substring($p_digits, 1, 4)"/>
</xsl:function>
<!--
Global variables
-->
<xsl:variable name="g-columns">
<columnTitle>Forenames *</columnTitle>
<columnTitle>Surname *</columnTitle>
<columnTitle>OrielApplicantID (NEW)</columnTitle>
<columnTitle>GMC Number</columnTitle>
<columnTitle>GDC Number</columnTitle>
<columnTitle>Public Health Number</columnTitle>
<columnTitle>Role</columnTitle>
<columnTitle>Record Status</columnTitle>
<columnTitle>Programme Name</columnTitle>
<columnTitle>Programme Number</columnTitle>
<columnTitle>Programme Membership Type</columnTitle>
<columnTitle>Programme End Date</columnTitle>
<columnTitle>Rotation</columnTitle>
<columnTitle>Curriculum #1</columnTitle>
<columnTitle>Curriculum #1 End Date</columnTitle>
<columnTitle>Curriculum #1 Start Date</columnTitle>
<columnTitle>Curriculum #2</columnTitle>
<columnTitle>Curriculum #2 End Date</columnTitle>
<columnTitle>Curriculum #2 Start Date</columnTitle>
<columnTitle>Curriculum #3</columnTitle>
<columnTitle>Curriculum #3 End Date</columnTitle>
<columnTitle>Curriculum #3 Start Date</columnTitle>
<columnTitle>Title</columnTitle>
<columnTitle>Known As</columnTitle>
<columnTitle>Date of Birth</columnTitle>
<columnTitle>NI number</columnTitle>
<columnTitle>Email Address</columnTitle>
<columnTitle>Mobile</columnTitle>
<columnTitle>Telephone</columnTitle>
<columnTitle>Address 1</columnTitle>
<columnTitle>Address 2</columnTitle>
<columnTitle>Address 3</columnTitle>
<columnTitle>Post Code</columnTitle>
<columnTitle>Gender</columnTitle>
<columnTitle>Nationality</columnTitle>
<columnTitle>Marital Status</columnTitle>
<columnTitle>Religious Belief</columnTitle>
<columnTitle>Ethnic Origin</columnTitle>
<columnTitle>Sexual Orientation</columnTitle>
<columnTitle>Disability</columnTitle>
<columnTitle>Disability Details</columnTitle>
<columnTitle>EEA Resident</columnTitle>
<columnTitle>Permit to Work</columnTitle>
<columnTitle>Settled, Permit to Work</columnTitle>
<columnTitle>Visa Details/Number</columnTitle>
<columnTitle>Visa Issued</columnTitle>
<columnTitle>Visa Valid To</columnTitle>
<columnTitle>Qualification</columnTitle>
<columnTitle>Medical School</columnTitle>
<columnTitle>Country of Qualification</columnTitle>
<columnTitle>Date Attained</columnTitle>
</xsl:variable>
<xsl:variable name="g-nl" as="xs:string" select="'
'"/>
<!--
Start of templates
-->
<xsl:template match="/" name="xsl:initial-template">
<xsl:message select="'Building XML representation of Nino file...'"/>
<xsl:variable name="nino-xml">
<xsl:element name="NINOS">
<xsl:apply-templates/>
</xsl:element>
</xsl:variable>
<xsl:message select="'Done'"/>
<xsl:message select="'Flattening Nino file...'"/>
<xsl:variable name="flattened">
<xsl:apply-templates select="$nino-xml/*"/>
</xsl:variable>
<xsl:message select="'Done'"/>
<xsl:message select="'Building CSV file...'"/>
<xsl:text>"</xsl:text>
<xsl:value-of select="$g-columns/*" separator='","'/>
<xsl:text>"</xsl:text>
<xsl:value-of select="$g-nl"/>
<xsl:apply-templates select="$flattened/*"/>
<xsl:message select="'Done'"/>
</xsl:template>
<!-- -->
<xsl:template match="xml">
<xsl:analyze-string select="." regex="^HDR.*|^TRL.*" flags="m">
<xsl:matching-substring>
<xsl:choose>
<xsl:when test="fn:tokenize(., ',')[1] = 'HDR'">
<xsl:message select="'File name in header record: ' || fn:tokenize(., ',')[2]"/>
</xsl:when>
<xsl:when test="fn:tokenize(., ',')[1] = 'TRL'">
<xsl:message select="'Trailer claims there are ' || xs:int(fn:tokenize(., ',')[2]) - 2 || ' data records on the file'"/>
<xsl:message select="'There should be ' || (xs:int(fn:tokenize(., ',')[2]) - 2) div 2 || ' data records in the output file'"/>
</xsl:when>
</xsl:choose>
</xsl:matching-substring>
</xsl:analyze-string>
<xsl:analyze-string select="." regex="^HDR.*|^PER.*|^PBD.*|^TRL.*" flags="m">
<xsl:matching-substring>
<xsl:element name="{fn:substring(.,1,3)}">
<xsl:value-of select="."/>
</xsl:element>
</xsl:matching-substring>
<xsl:non-matching-substring>
<xsl:if test="fn:normalize-space(.) != ''">
<xsl:message select="'Unmatched record type !' || . || '!'"/>
</xsl:if>
</xsl:non-matching-substring>
</xsl:analyze-string>
</xsl:template>
<!-- -->
<xsl:template match="NINOS">
<xsl:for-each-group select="./PER|PBD" group-starting-with="PER">
<xsl:element name="row">
<xsl:attribute name="no" select="position()"/>
<xsl:if test="fn:count(fn:current-group()) gt 2">
<xsl:message select="(fn:count(fn:current-group()) - 1) || ' PBD records found for ' || fn:current-group()[1]"/>
</xsl:if>
<xsl:if test="fn:count(fn:current-group()) lt 2">
<xsl:message select="'*** No PBD records found for ' || fn:current-group()[1] || ' ***'"/>
</xsl:if>
<xsl:apply-templates select="."/>
<xsl:for-each select="fn:current-group() except .">
<xsl:apply-templates select="."/>
</xsl:for-each>
</xsl:element>
</xsl:for-each-group>
</xsl:template>
<!-- -->
<xsl:template match="PER">
<xsl:variable name="per-tokens" select="fn:tokenize(., ',')"/>
<xsl:element name="lastName">
<xsl:attribute name="colId" select="2"></xsl:attribute>
<xsl:value-of select="$per-tokens[8]"/>
</xsl:element>
<xsl:element name="firstName">
<xsl:attribute name="colId" select="1"></xsl:attribute>
<xsl:value-of select="fn:normalize-space($per-tokens[9] || ' ' || $per-tokens[10])"/>
</xsl:element>
<xsl:element name="nino">
<xsl:attribute name="colId" select="26"></xsl:attribute>
<xsl:value-of select="$per-tokens[7]"/>
<xsl:if test="not(fn:matches($per-tokens[7], '^[A-CEGHJ-PRSTW-Z][A-CEGHJ-NPRSTW-Z][0-9]{6}[A-D]?$'))">
<xsl:message select="'Nino ''' || $per-tokens[7] || ''' doesn''t match HMRC format for ' || ."/>
</xsl:if>
</xsl:element>
<!-- <xsl:element name="dob">
<xsl:attribute name="colId" select="25"></xsl:attribute>
<xsl:value-of select="loc:human_date_from_digits( $per-tokens[14])"/>
</xsl:element> -->
</xsl:template>
<!-- -->
<xsl:template match="PBD">
<xsl:variable name="pbd-tokens" select="fn:tokenize(., ',')"/>
<xsl:element name="body">
<xsl:choose>
<xsl:when test="$pbd-tokens[5] = 'General Medical Council'">
<xsl:attribute name="colId" select="4"></xsl:attribute>
<xsl:attribute name="body" select="$pbd-tokens[5]"></xsl:attribute>
<xsl:value-of select="$pbd-tokens[7]"/>
</xsl:when>
<xsl:when test="$pbd-tokens[5] = 'General Dental Council'">
<xsl:attribute name="colId" select="5"></xsl:attribute>
<xsl:attribute name="body" select="$pbd-tokens[5]"></xsl:attribute>
<xsl:value-of select="$pbd-tokens[7]"/>
</xsl:when>
<xsl:otherwise>
<xsl:message select="'Unrecognised professinonal body - ' || $pbd-tokens[5]"/>
<xsl:attribute name="colId" select="4"></xsl:attribute>
<xsl:attribute name="body" select="'Unrecognised Professinonal Body'"></xsl:attribute>
</xsl:otherwise>
</xsl:choose>
</xsl:element>
</xsl:template>
<!-- -->
<xsl:template match="row">
<!-- <xsl:sequence select="'Row: '"/>
<xsl:value-of select="." separator='","'/>
<xsl:sequence select="$g-nl"/> -->
<!-- Line up the ducks -->
<xsl:variable name="sorted-cols">
<xsl:perform-sort select="./*">
<xsl:sort select="xs:int(./@colId)"/>
</xsl:perform-sort>
</xsl:variable>
<!-- -->
<xsl:for-each select="$sorted-cols/*">
<xsl:for-each select="xs:int(preceding-sibling::*[1]/@colId) to xs:int(./@colId) - 1">
<xsl:text>,</xsl:text>
</xsl:for-each>
<xsl:value-of select='"""" || . ||""""'/>
</xsl:for-each>
<xsl:for-each select="1 to count($g-columns/*) - xs:int($sorted-cols/*[last()]/@colId) + 1">
<xsl:text>,</xsl:text>
</xsl:for-each>
<xsl:value-of select="$g-nl"/>
</xsl:template>
<!-- -->
</xsl:stylesheet>
Sample Input
Note, any text files supplied from ESR require wrapping as below:
<?xml version="1.0" encoding="UTF-8"?>
<xml>
:
:
</xml>
<?xml version="1.0" encoding="UTF-8"?>
<xml>
HDR,DE_EMD_RNI_20191009_00900003.DAT,20191011 134315,20191008 000000,20191009 235959
PER,9988771,19876543,,,,SJ352089A,Ahmed,Abiy,,,,,19780211,EMD/RX1RA1/080/HT/001,,,,,,,,,,,,,,
PBD,9988771,19876543,,General Medical Council,,7678912,,
PER,9988772,19876544,,,,JY698251B,Abdullah,Mohammed,,,,,19930507,EMD/RX1RA1/080/HT/002,,,,,,,,,,,,,,
PBD,9988772,19876544,,General Medical Council,,7789123,,
PER,9988773,19876545,,,,JZ999522C,Cuthmore,Victoria,Anne,,,,19911217,EMD/RX1RA1/080/F1/003,,,,,,,,,,,,,,
PBD,9988773,19876545,,General Medical Council,,7912345,,
PER,9988774,19876546,,,,JT118449A,Odell,Julie,,,,,19831021,EMD/RX1RA1/011/F1/004,,,,,,,,,,,,,,
PBD,9988774,19876546,,General Medical Council,,6789612,,
PER,9988775,19876547,,,,JM844898C,Gomez,Jennifer,Ava,,,,19871123,EMD/RX1RA1/012/F1/001,,,,,,,,,,,,,,
PBD,9988775,19876547,,General Medical Council,,7234567,,
TRL,12
</xml>
Sample Output
"Forenames *","Surname *","OrielApplicantID (NEW)","GMC Number","GDC Number","Public Health Number","Role","Record Status","Programme Name","Programme Number","Programme Membership Type","Programme End Date","Rotation","Curriculum #1","Curriculum #1 End Date","Curriculum #1 Start Date","Curriculum #2","Curriculum #2 End Date","Curriculum #2 Start Date","Curriculum #3","Curriculum #3 End Date","Curriculum #3 Start Date","Title","Known As","Date of Birth","NI number","Email Address","Mobile","Telephone","Address 1","Address 2","Address 3","Post Code","Gender","Nationality","Marital Status","Religious Belief","Ethnic Origin","Sexual Orientation","Disability","Disability Details","EEA Resident","Permit to Work","Settled, Permit to Work","Visa Details/Number","Visa Issued","Visa Valid To","Qualification","Medical School","Country of Qualification","Date Attained"
"Ably","Ahmed",,"7611912",,,,,,,,,,,,,,,,,,,,,,"SJ356784A",,,,,,,,,,,,,,,,,,,,,,,,,,
"Monty","Abdullah",,"7711123",,,,,,,,,,,,,,,,,,,,,,"JY696295B",,,,,,,,,,,,,,,,,,,,,,,,,,
"jonno","Cuthmore",,"7911345",,,,,,,,,,,,,,,,,,,,,,"JZ912342C",,,,,,,,,,,,,,,,,,,,,,,,,,
"Jammy","Odell",,"6781112",,,,,,,,,,,,,,,,,,,,,,"JT134589A",,,,,,,,,,,,,,,,,,,,,,,,,,
"Joshua","Gomez",,"7231167",,,,,,,,,,,,,,,,,,,,,,"JM844657C",,,,,,,,,,,,,,,,,,,,,,,,,,
Slack: https://hee-nhs-tis.slack.com/
Jira issues: https://hee-tis.atlassian.net/issues/?filter=14213