logo
Welcome Guest! To enable all features please Login or Register.

Notification

Icon
Error

Options
Go to last post Go to first unread
Paul Rayman  
#1 Posted : Saturday, November 2, 2019 2:59:28 AM(UTC)
Paul Rayman

Rank: Administration

Groups: Administrators
Joined: 1/5/2016(UTC)
Posts: 1,107

Thanks: 7 times
Was thanked: 130 time(s) in 127 post(s)
Question:
is there a possibility to get the embedded files of a pdf document with .net code?

Answer:
An embedded file stream can be included in a PDF document in the following
ways:
  • File attachment annotations, which associate the embedded file with a location on a page in the document
  • Embedded file streams can be associated with the document as a whole
    through the EmbeddedFiles entry in the PDF document’s name dictionary


Code:

static void Main(string[] args)
{
    using (var doc = PdfDocument.Load(@"test.pdf"))
    {
        ExtractAttachmentsFromCatalog(doc);
        ExtractAttachmentsFromAnnotations(doc);
    }
}

private static void ExtractAttachmentsFromCatalog(PdfDocument doc)
{
    if (doc.Root.ContainsKey("Names"))
    {
        var names = doc.Root["Names"].As<PdfTypeDictionary>();
        if (names.ContainsKey("EmbeddedFiles"))
        {
            var embeddedFiles = names["EmbeddedFiles"].As<PdfTypeDictionary>();
            var attachments = embeddedFiles["Names"].As<PdfTypeArray>();
            for (int i = 0; i < attachments.Count; i += 2)
            {
                string attachmentName = attachments[i].As<PdfTypeString>().UnicodeString;
                var attachmentDict = attachments[i + 1].As<PdfTypeDictionary>();
                var attachmentStream = attachmentDict["EF"].As<PdfTypeDictionary>()["F"].As<PdfTypeStream>();
                var data = attachmentStream.DecodedData;
                WriteExtractedData(attachmentName, data);
            }
        }
    }
}

private static void ExtractAttachmentsFromAnnotations(PdfDocument doc)
{
    foreach (var page in doc.Pages)
    {
        if (page.Annots == null)
            continue;
        foreach (var annot in page.Annots)
        {
            if (annot is PdfFileAttachmentAnnotation)
            {
                var fileSpec = (annot as PdfFileAttachmentAnnotation).FileSpecification;
                var file = fileSpec.EmbeddedFile;
                byte[] fileContent = file.Stream.DecodedData;
                WriteExtractedData(fileSpec.FileName, fileContent);
            }
        }
        page.Dispose();
    }
}

private static void WriteExtractedData(string name, byte[] data)
{
    System.IO.File.WriteAllBytes($@"Out\{name}", data);
}

Edited by user Saturday, November 2, 2019 3:09:11 AM(UTC)  | Reason: Not specified

Users browsing this topic
Forum Jump  
You cannot post new topics in this forum.
You cannot reply to topics in this forum.
You cannot delete your posts in this forum.
You cannot edit your posts in this forum.
You cannot create polls in this forum.
You cannot vote in polls in this forum.