<?xml version="1.0" encoding="UTF-8" standalone="no" ?>
<Rule xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="Rule_Axmedis.xsd">
  <Header>
    <Rule_Name>TextProcessing_DIPITA</Rule_Name>
    <AXRID>axcprule:e3cb5098-08b0-4905-b017-165f8d289f6c</AXRID>
    <Rule_Version></Rule_Version>
    <Rule_Type>AXCP</Rule_Type>
    <Software_Name></Software_Name>
    <Version_of_software></Version_of_software>
    <Date_of_production>2006-11-29</Date_of_production>
    <Author></Author>
    <Affiliation></Affiliation>
    <URL></URL>
    <Comment></Comment>
    <Last_Modifications>2006-12-13</Last_Modifications>
    <Terminal_ID></Terminal_ID>
    <Cost></Cost>
    <Work_Item_ID></Work_Item_ID>
  </Header>
  <Schedule>
    <Run>
      <Date>2006-11-29</Date>
      <Time>09:21:10</Time>
      <Periodicity Unit="Day">0</Periodicity>
      <Expiration_Date>2006-11-29</Expiration_Date>
      <Expiration_Time>09:21:10</Expiration_Time>
    </Run>
    <Status>Inactive</Status>
  </Schedule>
  <Definition>
    <AXCP_Rule>
      <Arguments/>
      <Rule_Body>
        <JS_Script name="JScript(0)"><![CDATA[var path="G:\\axmedis\\Ev\\";

// loading raw resource - PDF
var pdfRes = new AxResource();
//pdfRes.load("I:\\axmedis\\Ev\\axmedis-pres-eng-v1-7-short.pdf");
print("Reading pdf file");
pdfRes.load(path + "Zini-et-al_AXMEDIS2006.pdf");
// embedding the resource in a new object
print("Embedding the resource in a new Axmedis Object");
var obj = new AxmedisObject();
obj.addContent(pdfRes);

// Document Adaptation
// 1. Transcoding to HTML
var htmlRes = new AxResource();
htmlRes.contentID = "HTMLResource";
print("Document Adaptation. Transcoding to HTML");
TextDocsAdaptation.DocumentConversion(pdfRes,"text/html",htmlRes);
// 2. Embedding the HTML resource in the object
obj.addContent(htmlRes);
var textRes = new AxResource();
textRes.contentID = "PlainTextResource";
// 3. Transcoding to plain text
print("Document Adaptation. Transcoding to plain text");
TextDocsAdaptation.DocumentConversion(pdfRes,"text/plain",textRes);
// 4. Embedding the plain text resource in the object
obj.addContent(textRes);

// Language Guessing
// 5. Guessing resource language
var language = new Array(1);
language[0]="";
print("Guessing resource language");
LanguageGuesser.LanguageGuesser(textRes,language);
// 6. Adding language to Dublin Core metadata
var dublinCore = obj.getDublinCore();
dublinCore.addDCElement("language", language[0]);

// Keyword Extraction
// 7. Retrieving keyowrds
var keywords = new Array(1);
keywords[0]="";
print("Extracting keywords");
TextDescriptors.KWFromComparisons(textRes,4,false,keywords);
//print(keywords[0]);
var commaSepKeywords = keywords[0].replace(/[\n\r]+/g, ", ");
commaSepKeywords = commaSepKeywords.replace(/_{3}/g," ");
// 8. Adding keywords to Dublin Core metadata
print("Adding keywords to Dublin Core metadata");
dublinCore.addDCElement("subject", commaSepKeywords);

// 9. saving object to file
print("Saving object to file");
obj.save(path + "textproc.axm");
true;]]>
        </JS_Script>
      </Rule_Body>
      <Dependencies>
        <Dependency>
          <Plug_In_name>TextDescriptors</Plug_In_name>
          <Version>1.001</Version>
        </Dependency>
        <Dependency>
          <Plug_In_name>TextDocsAdaptation</Plug_In_name>
          <Version>1.001</Version>
        </Dependency>
        <Dependency>
          <Plug_In_name>LanguageGuesser</Plug_In_name>
          <Version>1.001</Version>
        </Dependency>
      </Dependencies>
    </AXCP_Rule>
  </Definition>
</Rule>

