====== Manage DublinCore by CSV, BASH and CRUD ====== \\ **CSV -> DC.xml -> update datastream** * spreadsheet: * first row PID, dc:title, dc:date, ... * an object per row, multiple values into same cell separated by | * convert spreadsheet to CSV using § as field separator, no text delimiter * run CSV2DC.sh script to convert data into files ready for CRUD replace DS


#!/bin/bash
INPUT=$1

OLDIFS=$IFS
IFS='§'
[ ! -f $INPUT ] && { echo "$INPUT file not found"; exit 99; }
while read -a LABELS
do
	for index in ${!LABELS[@]}; do
    		echo $index/${LABELS[index]}
	done
	break
done < $INPUT
IFS=$OLDIFS

OLDIFS=$IFS
IFS='§'
[ ! -f $INPUT ] && { echo "$INPUT file not found"; exit 99; }
I=1
while read -a VALUES
do
	test $I -eq 1 && ((I=I+1)) && continue

	test -z ${VALUES[0]} && continue
	IDENTIFIER=${VALUES[0]}
	FNAME=${IDENTIFIER//:/_}
	shopt -s extglob
	FNAME="${FNAME##*( )}"
	FNAME="${FNAME%%*( )}"
	shopt -u extglob
	FNAME=$FNAME'_DC.xml'
	touch $FNAME
	echo $FNAME
	
	echo '' > $FNAME
	for index in ${!VALUES[@]}; do
		test $index -eq 0 && continue
		VALUE=${VALUES[index]//&/&}
		IFS='|' read -ra ELEM <<< "$VALUE"
		for i in "${ELEM[@]}"; do
                        i_trimmed="$(echo -e "${i}"|sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')"
			test ! -z ${i_trimmed} && echo '	<'"${LABELS[index]}"'>'"${i_trimmed}"'' >> $FNAME
		done
		IFS='§'
	done
	echo '' >> $FNAME

done < $INPUT
IFS=$OLDIFS
exit

Created as much files as rows (less 1) i.e. smtextlib_17052_DC.xml, smtextlib_17053_DC.xml, ... * Move xml files to empty dir as ~/book/DC then run CRUD to replace DC datastreams


drush -u 1 -v islandora_datastream_crud_push_datastreams --datastreams_source_directory=/home/giancarlo/book/DC --datastreams_crud_log=/home/giancarlo/crud.log --update_object_label

\\ **Retrieve multiple DC.xml -> CSV** * Run CRUD to list PIDs of specific collection:


drush -u 1 -v islandora_datastream_crud_fetch_pids --collection=smarch:lds2002 --pid_file=/home/giancarlo/book/PID/pids.txt

* Run CRUD to extract DC of listed PIDs:


drush -u 1 -v islandora_datastream_crud_fetch_datastreams --pid_file=/home/giancarlo/book/PID/pids.txt --dsid=DC --datastreams_directory=/home/giancarlo/book/DC

* Create DCElements.txt:


dc:title§dc:date§dc:coverage§dc:description§dc:source§dc:subject§dc:contributor§dc:identifier§dc:creator§dc:publisher§dc:type§dc:language§dc:format§dc:relation§dc:rights

* Single object DC bash converter, DCXML2CSV.sh:


#!/bin/bash

	ELEMENTS=$1
	OLDIFS=$IFS
	IFS='§'
	[ ! -f $ELEMENTS ] && { echo "$ELEMENTS file not found"; exit 99; }
	while read -a LABELS
	do
		for index in ${!LABELS[@]}; do
	    		echo $index/${LABELS[index]}
		done
		break
	done < $ELEMENTS
	IFS=$OLDIFS

	OUTFILE=$3

	POS=$(( ${#LABELS[*]} - 1 ))
	LAST=${LABELS[$POS]}

	XMLFILE=$2

	PID=$(xmlstarlet sel -t -v "//dc:identifier[1]" $XMLFILE)
	echo -n $PID"§" >> $OUTFILE

	for index in ${!LABELS[@]}; do

		COUNT=$(xmlstarlet sel -t -v "count(//${LABELS[index]})" $XMLFILE)

		counter=1
		while [ $counter -le $COUNT ]
		do
			VALUE=$(xmlstarlet sel -t -v "//${LABELS[index]}[$counter]" $XMLFILE)

			if [ $counter -eq $COUNT ]
		  	then
		     		echo -n $VALUE >> $OUTFILE 
		  	else 
		     		echo -n $VALUE"|" >> $OUTFILE
		  	fi 

			((counter++))
		done

		if [[ ${LABELS[index]} == $LAST ]]
	  	then
	     		echo "" >> $OUTFILE 
	  	else 
	     		echo -n "§" >> $OUTFILE
	  	fi 
	done
exit

* Run extract.sh for multiple DCs to single csv file:


#!/bin/bash

# $1 file ELEMENTS
# $2 file DIR
# $3 file OUTPUT

       ELEMENTS=$1
       OUTFILE=$3
       echo -n "PID§" > $OUTFILE
       cat $ELEMENTS >> $OUTFILE

DCDIR=$2

SAVEIFS=$IFS
IFS=$(echo -en "\n\b")
for dc in $(find $DCDIR/smtextlib_* -type f);
       do
		echo $dc
		./DCXML2CSV.sh $1 $dc $3

done
exit


./extract.sh DCElements.txt DC DC/2002.csv