Skip to end of metadata
Go to start of metadata

You are viewing an old version of this page. View the current version.

Compare with Current View Page History

« Previous Version 3 Current »

Script to transform a file in NHS0098 format into a csv format suitable for use with the People bulk load template.

Script

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:fn="http://www.w3.org/2005/xpath-functions" xmlns:math="http://www.w3.org/2005/xpath-functions/math" xmlns:array="http://www.w3.org/2005/xpath-functions/array" xmlns:map="http://www.w3.org/2005/xpath-functions/map" xmlns:xhtml="http://www.w3.org/1999/xhtml" xmlns:err="http://www.w3.org/2005/xqt-errors" xmlns:loc="http://tis.nhs.gov.uk" exclude-result-prefixes="array fn map math xhtml xs err loc" version="3.0">
	<!-- 

	-->
	<xsl:output method="text" version="1.0" encoding="UTF-8" indent="yes"/>
	<!-- 

	-->
	<xsl:function name="loc:human_date_from_digits" as="xs:string">
		<xsl:param name="p_digits" as="xs:string"/>
		<xsl:sequence select="fn:substring($p_digits, 7,2) || '/' || fn:substring($p_digits, 5, 2) || '/' || fn:substring($p_digits, 1, 4)"/>
	</xsl:function>
	<!--
		Global variables
	-->
	<xsl:variable name="g-columns">
		<columnTitle>Forenames *</columnTitle>
		<columnTitle>Surname *</columnTitle>
		<columnTitle>OrielApplicantID (NEW)</columnTitle>
		<columnTitle>GMC Number</columnTitle>
		<columnTitle>GDC Number</columnTitle>
		<columnTitle>Public Health Number</columnTitle>
		<columnTitle>Role</columnTitle>
		<columnTitle>Record Status</columnTitle>
		<columnTitle>Programme Name</columnTitle>
		<columnTitle>Programme Number</columnTitle>
		<columnTitle>Programme Membership Type</columnTitle>
		<columnTitle>Programme End Date</columnTitle>
		<columnTitle>Rotation</columnTitle>
		<columnTitle>Curriculum #1</columnTitle>
		<columnTitle>Curriculum #1 End Date</columnTitle>
		<columnTitle>Curriculum #1 Start Date</columnTitle>
		<columnTitle>Curriculum #2</columnTitle>
		<columnTitle>Curriculum #2 End Date</columnTitle>
		<columnTitle>Curriculum #2 Start Date</columnTitle>
		<columnTitle>Curriculum #3</columnTitle>
		<columnTitle>Curriculum #3 End Date</columnTitle>
		<columnTitle>Curriculum #3 Start Date</columnTitle>
		<columnTitle>Title</columnTitle>
		<columnTitle>Known As</columnTitle>
		<columnTitle>Date of Birth</columnTitle>
		<columnTitle>NI number</columnTitle>
		<columnTitle>Email Address</columnTitle>
		<columnTitle>Mobile</columnTitle>
		<columnTitle>Telephone</columnTitle>
		<columnTitle>Address 1</columnTitle>
		<columnTitle>Address 2</columnTitle>
		<columnTitle>Address 3</columnTitle>
		<columnTitle>Post Code</columnTitle>
		<columnTitle>Gender</columnTitle>
		<columnTitle>Nationality</columnTitle>
		<columnTitle>Marital Status</columnTitle>
		<columnTitle>Religious Belief</columnTitle>
		<columnTitle>Ethnic Origin</columnTitle>
		<columnTitle>Sexual Orientation</columnTitle>
		<columnTitle>Disability</columnTitle>
		<columnTitle>Disability Details</columnTitle>
		<columnTitle>EEA Resident</columnTitle>
		<columnTitle>Permit to Work</columnTitle>
		<columnTitle>Settled, Permit to Work</columnTitle>
		<columnTitle>Visa Details/Number</columnTitle>
		<columnTitle>Visa Issued</columnTitle>
		<columnTitle>Visa Valid To</columnTitle>
		<columnTitle>Qualification</columnTitle>
		<columnTitle>Medical School</columnTitle>
		<columnTitle>Country of Qualification</columnTitle>
		<columnTitle>Date Attained</columnTitle>
	</xsl:variable>
	<xsl:variable name="g-nl" as="xs:string" select="'&#x0d;&#x0a;'"/>
	<!--
		Start of templates
	-->
	<xsl:template match="/" name="xsl:initial-template">
		<xsl:message select="'Building XML representation of Nino file...'"/>
		<xsl:variable name="nino-xml">
			<xsl:element name="NINOS">
				<xsl:apply-templates/>
			</xsl:element>
		</xsl:variable>
		<xsl:message select="'Done'"/>
		<xsl:message select="'Flattening Nino file...'"/>
		<xsl:variable name="flattened">
			<xsl:apply-templates select="$nino-xml/*"/>
		</xsl:variable>
		<xsl:message select="'Done'"/>
		<xsl:message select="'Building CSV file...'"/>
		<xsl:text>"</xsl:text>
		<xsl:value-of select="$g-columns/*" separator='","'/>
		<xsl:text>"</xsl:text>
		<xsl:value-of select="$g-nl"/>
		<xsl:apply-templates select="$flattened/*"/>
		<xsl:message select="'Done'"/>
	</xsl:template>
	<!-- -->
	<xsl:template match="xml">
		<xsl:analyze-string select="." regex="^HDR.*|^TRL.*" flags="m">
			<xsl:matching-substring>
				<xsl:choose>
					<xsl:when test="fn:tokenize(., ',')[1] = 'HDR'">
						<xsl:message select="'File name in header record: ' || fn:tokenize(., ',')[2]"/>
					</xsl:when>
					<xsl:when test="fn:tokenize(., ',')[1] = 'TRL'">
						<xsl:message select="'Trailer claims there are ' || xs:int(fn:tokenize(., ',')[2]) - 2 || ' data records on the file'"/>
						<xsl:message select="'There should be ' || (xs:int(fn:tokenize(., ',')[2]) - 2) div 2 || ' data records in the output file'"/>
					</xsl:when>
				</xsl:choose>
			</xsl:matching-substring>
		</xsl:analyze-string>
		<xsl:analyze-string select="." regex="^HDR.*|^PER.*|^PBD.*|^TRL.*" flags="m">
			<xsl:matching-substring>
				<xsl:element name="{fn:substring(.,1,3)}">
					<xsl:value-of select="."/>
				</xsl:element>
			</xsl:matching-substring>
			<xsl:non-matching-substring>
				<xsl:if test="fn:normalize-space(.) != ''">
					<xsl:message select="'Unmatched record type !' || . || '!'"/>
				</xsl:if>
			</xsl:non-matching-substring>
		</xsl:analyze-string>
	</xsl:template>
	<!-- -->
	<xsl:template match="NINOS">
		<xsl:for-each-group select="./PER|PBD" group-starting-with="PER">
			<xsl:element name="row">
				<xsl:attribute name="no" select="position()"/>
				<xsl:if test="fn:count(fn:current-group()) gt 2">
					<xsl:message select="(fn:count(fn:current-group()) - 1) || ' PBD records found for ' || fn:current-group()[1]"/>
				</xsl:if>
				<xsl:if test="fn:count(fn:current-group()) lt 2">
					<xsl:message select="'*** No PBD records found for ' || fn:current-group()[1] || ' ***'"/>
				</xsl:if>
				<xsl:apply-templates select="."/>
				<xsl:for-each select="fn:current-group() except .">
					<xsl:apply-templates select="."/>
				</xsl:for-each>
			</xsl:element>
		</xsl:for-each-group>
	</xsl:template>
	<!-- -->
	<xsl:template match="PER">
		<xsl:variable name="per-tokens" select="fn:tokenize(., ',')"/>
		<xsl:element name="lastName">
			<xsl:attribute name="colId" select="2"></xsl:attribute>
			<xsl:value-of select="$per-tokens[8]"/>
		</xsl:element>
		<xsl:element name="firstName">
			<xsl:attribute name="colId" select="1"></xsl:attribute>
			<xsl:value-of select="fn:normalize-space($per-tokens[9] || ' ' || $per-tokens[10])"/>
		</xsl:element>
		<xsl:element name="nino">
			<xsl:attribute name="colId" select="26"></xsl:attribute>
			<xsl:value-of select="$per-tokens[7]"/>
			<xsl:if test="not(fn:matches($per-tokens[7], '^[A-CEGHJ-PRSTW-Z][A-CEGHJ-NPRSTW-Z][0-9]{6}[A-D]?$'))">
				<xsl:message select="'Nino ''' || $per-tokens[7] || ''' doesn''t match HMRC format for ' || ."/>
			</xsl:if>
		</xsl:element>
		<!--		<xsl:element name="dob">
			<xsl:attribute name="colId" select="25"></xsl:attribute>
			<xsl:value-of select="loc:human_date_from_digits( $per-tokens[14])"/>
		</xsl:element> -->
	</xsl:template>
	<!-- -->
	<xsl:template match="PBD">
		<xsl:variable name="pbd-tokens" select="fn:tokenize(., ',')"/>
		<xsl:element name="body">
			<xsl:choose>
				<xsl:when test="$pbd-tokens[5] = 'General Medical Council'">
					<xsl:attribute name="colId" select="4"></xsl:attribute>
					<xsl:attribute name="body" select="$pbd-tokens[5]"></xsl:attribute>
					<xsl:value-of select="$pbd-tokens[7]"/>
				</xsl:when>
				<xsl:when test="$pbd-tokens[5] = 'General Dental Council'">
					<xsl:attribute name="colId" select="5"></xsl:attribute>
					<xsl:attribute name="body" select="$pbd-tokens[5]"></xsl:attribute>
					<xsl:value-of select="$pbd-tokens[7]"/>
				</xsl:when>
				<xsl:otherwise>
					<xsl:message select="'Unrecognised professinonal body - ' || $pbd-tokens[5]"/>
					<xsl:attribute name="colId" select="4"></xsl:attribute>
					<xsl:attribute name="body" select="'Unrecognised Professinonal Body'"></xsl:attribute>
				</xsl:otherwise>
			</xsl:choose>
		</xsl:element>
	</xsl:template>
	<!-- -->
	<xsl:template match="row">
		<!--		<xsl:sequence select="'Row: '"/>
		<xsl:value-of select="." separator='","'/>
		<xsl:sequence select="$g-nl"/> -->
		<!-- Line up the ducks -->
		<xsl:variable name="sorted-cols">
			<xsl:perform-sort select="./*">
				<xsl:sort select="xs:int(./@colId)"/>
			</xsl:perform-sort>
		</xsl:variable>
		<!--  -->
		<xsl:for-each select="$sorted-cols/*">
			<xsl:for-each select="xs:int(preceding-sibling::*[1]/@colId) to xs:int(./@colId) - 1">
				<xsl:text>,</xsl:text>
			</xsl:for-each>
			<xsl:value-of select='"""" || . ||""""'/>
		</xsl:for-each>
		<xsl:for-each select="1 to count($g-columns/*) - xs:int($sorted-cols/*[last()]/@colId) + 1">
			<xsl:text>,</xsl:text>
		</xsl:for-each>
		<xsl:value-of select="$g-nl"/>
	</xsl:template>
	<!-- -->
</xsl:stylesheet>

Sample Input

Note, any text files supplied from ESR require wrapping as below:

<?xml version="1.0" encoding="UTF-8"?>
<xml>
:
:
</xml>

<?xml version="1.0" encoding="UTF-8"?>
<xml>
HDR,DE_EMD_RNI_20191009_00900003.DAT,20191011 134315,20191008 000000,20191009 235959
PER,9988771,19876543,,,,SJ352089A,Ahmed,Abiy,,,,,19780211,EMD/RX1RA1/080/HT/001,,,,,,,,,,,,,,
PBD,9988771,19876543,,General Medical Council,,7678912,,
PER,9988772,19876544,,,,JY698251B,Abdullah,Mohammed,,,,,19930507,EMD/RX1RA1/080/HT/002,,,,,,,,,,,,,,
PBD,9988772,19876544,,General Medical Council,,7789123,,
PER,9988773,19876545,,,,JZ999522C,Cuthmore,Victoria,Anne,,,,19911217,EMD/RX1RA1/080/F1/003,,,,,,,,,,,,,,
PBD,9988773,19876545,,General Medical Council,,7912345,,
PER,9988774,19876546,,,,JT118449A,Odell,Julie,,,,,19831021,EMD/RX1RA1/011/F1/004,,,,,,,,,,,,,,
PBD,9988774,19876546,,General Medical Council,,6789612,,
PER,9988775,19876547,,,,JM844898C,Gomez,Jennifer,Ava,,,,19871123,EMD/RX1RA1/012/F1/001,,,,,,,,,,,,,,
PBD,9988775,19876547,,General Medical Council,,7234567,,
TRL,12
</xml>

Sample Output

"Forenames *","Surname *","OrielApplicantID (NEW)","GMC Number","GDC Number","Public Health Number","Role","Record Status","Programme Name","Programme Number","Programme Membership Type","Programme End Date","Rotation","Curriculum #1","Curriculum #1 End Date","Curriculum #1 Start Date","Curriculum #2","Curriculum #2 End Date","Curriculum #2 Start Date","Curriculum #3","Curriculum #3 End Date","Curriculum #3 Start Date","Title","Known As","Date of Birth","NI number","Email Address","Mobile","Telephone","Address 1","Address 2","Address 3","Post Code","Gender","Nationality","Marital Status","Religious Belief","Ethnic Origin","Sexual Orientation","Disability","Disability Details","EEA Resident","Permit to Work","Settled, Permit to Work","Visa Details/Number","Visa Issued","Visa Valid To","Qualification","Medical School","Country of Qualification","Date Attained"
"Abiy","Ahmed",,"7678912",,,,,,,,,,,,,,,,,,,,,,"SJ352089A",,,,,,,,,,,,,,,,,,,,,,,,,,
"Mohammed","Abdullah",,"7789123",,,,,,,,,,,,,,,,,,,,,,"JY698251B",,,,,,,,,,,,,,,,,,,,,,,,,,
"Victoria Anne","Cuthmore",,"7912345",,,,,,,,,,,,,,,,,,,,,,"JZ999522C",,,,,,,,,,,,,,,,,,,,,,,,,,
"Julie","Odell",,"6789612",,,,,,,,,,,,,,,,,,,,,,"JT118449A",,,,,,,,,,,,,,,,,,,,,,,,,,
"Jennifer Ava","Gomez",,"7234567",,,,,,,,,,,,,,,,,,,,,,"JM844898C",,,,,,,,,,,,,,,,,,,,,,,,,,
  • No labels