Tests that Alto XML output works
Test tesseract-004.xml is expected to pass.
<p:declare-step xmlns:cx="http://xmlcalabash.com/ns/extensions" xmlns:p="http://www.w3.org/ns/xproc" name="main" version="3.0">
<p:import href="https://xmlcalabash.com/ext/library/pdf-steps.xpl"/>
<p:import href="https://xmlcalabash.com/ext/library/tesseract.xpl"/>
<p:output port="result"/>
<cx:pdf-to-images dpi="300">
<p:with-input port="source" href="../documents/example.pdf"/>
</cx:pdf-to-images>
<cx:tesseract language="eng" output-format="alto" debug-output="/dev/null"/>
</p:declare-step>
<s:schema xmlns:s="http://purl.oclc.org/dsdl/schematron" queryBinding="xslt2">
<s:pattern>
<s:rule context="/">
<s:assert test="Page">Wrong document element</s:assert>
</s:rule>
</s:pattern>
<s:pattern>
<s:rule context="/Page">
<s:assert test="PrintSpace">No PrintSpace</s:assert>
<s:assert test="PrintSpace/ComposedBlock/TextBlock/TextLine/String">No string</s:assert>
</s:rule>
</s:pattern>
</s:schema>
Created test.