VintaSoft Imaging .NET SDK 14.0: Документация для .NET разработчика
Vintasoft.Imaging.Pdf.Ocr Namespace / SearchablePdfGenerator Class
Члены типа Объект Синтаксис Example Иерархия Требования Смотрите также
В этом разделе
    Класс SearchablePdfGenerator
    В этом разделе
    Команда обработки, позволяющая добавлять распознанные страницы в PDF документ с возможностью поиска.
    Объектная модель
    ImageCollection OcrEngineManager OcrEngineSettings PdfFormat OcrCleanupSettings OcrTextOverImageSettings PdfCompressionSettings PdfCompressionSettings PdfCompressionSettings PdfCompressionSettings SearchablePdfGenerator

    Вот C#/VB.NET код, который демонстрирует, как преобразовать файл изображения в PDF документ с возможностью поиска:

    ''' <summary>
    ''' Converts an image file to a searchable PDF document.
    ''' </summary>
    ''' <param name="sourceFilePath">A filename of source image file.</param>
    ''' <param name="pageCreationMode">The PDF page creation mode.</param>
    ''' <param name="ocrLanguage">An OCR language that should be used for text recognition.</param>
    ''' <param name="pdfFilename">A filename of destination PDF file.</param>
    Public Shared Sub ConvertImagesToSearchablePdf(sourceFilePath As String, pageCreationMode As Vintasoft.Imaging.Pdf.Ocr.PdfPageCreationMode, ocrLanguage As Vintasoft.Imaging.Ocr.OcrLanguage, pdfFilename As String)
        ' create image collection
        Using images As New Vintasoft.Imaging.ImageCollection()
            ' add images to the image collection
                ' create Tesseract OCR engine
                Using tesseractOcrEngine As New Vintasoft.Imaging.Ocr.Tesseract.TesseractOcr()
                    ' create OCR engine manager
                    Dim ocrEngineManager As New Vintasoft.Imaging.Ocr.OcrEngineManager(tesseractOcrEngine)
                    ' create PDF generator
                    Dim pdfGenerator As New Vintasoft.Imaging.Pdf.Ocr.SearchablePdfGenerator(ocrEngineManager)
                    ' set source images in PDF generator
                    pdfGenerator.SourceImages = images
                    ' set PDF page creation mode in PDF generator
                    pdfGenerator.PageCreationMode = pageCreationMode
                    ' create Tesseract OCR settings
                    Dim tesseractOcrSettings As New Vintasoft.Imaging.Ocr.Tesseract.TesseractOcrSettings(ocrLanguage)
                    tesseractOcrSettings.RecognitionRegionType = Vintasoft.Imaging.Ocr.RecognitionRegionType.RecognizePageWithPageSegmentationAndOrientationDetection
                    ' if PDF pages must be created in "TextOverImage" mode
                    If pageCreationMode = Vintasoft.Imaging.Pdf.Ocr.PdfPageCreationMode.TextOverImage Then
                        tesseractOcrSettings.UseSymbolRegionsCorrection = True
                        tesseractOcrSettings.UseSymbolRegionsCorrection = False
                    End If
                    ' set Tesseract OCR settings
                    pdfGenerator.OcrEngineSettings = tesseractOcrSettings
                    ' if PDF pages must be created in "Text" mode
                    If pageCreationMode = Vintasoft.Imaging.Pdf.Ocr.PdfPageCreationMode.Text Then
                        ' set text color
                        pdfGenerator.TextColor = System.Drawing.Color.Black
                        ' text text font name
                        pdfGenerator.TextOnlyFontName = "Arial"
                    End If
                    ' subscribe to the PDF page added event for saving changes to PDF document
                    AddHandler pdfGenerator.PdfPageAdded, AddressOf Command_PdfPageAdded
                    ' subscribe to the image processing started event for preprocessing image
                    AddHandler pdfGenerator.ImageProcessingStarted, AddressOf PdfGenerator_ImageProcessingStarted
                    ' create PDF document
                    Using document As New Vintasoft.Imaging.Pdf.PdfDocument(pdfFilename, System.IO.FileMode.Create, Vintasoft.Imaging.Pdf.PdfFormat.Pdf_16)
                        ' generate PDF document pages
                        ' save PDF document
                    End Using
                End Using
                ' remove images
            End Try
        End Using
    End Sub
    ''' <summary>
    ''' Handles the <see cref="Vintasoft.Imaging.Pdf.Ocr.SearchablePdfGenerator.ImageProcessingStarted"/> event.
    ''' </summary>
    Private Shared Sub PdfGenerator_ImageProcessingStarted(sender As Object, e As Vintasoft.Imaging.Pdf.Ocr.OcrImageProcessingEventArgs)
        ' create image copy
        Dim image As Vintasoft.Imaging.VintasoftImage = DirectCast(e.Image.Clone(), Vintasoft.Imaging.VintasoftImage)
            ' preprocess image
            Dim deskewCommand As New Vintasoft.Imaging.ImageProcessing.Document.DeskewCommand()
            Dim autoTextOrientationCommand As New Vintasoft.Imaging.ImageProcessing.Document.AutoTextOrientationCommand()
            ' change the processing image (the processing command will dispose image after use)
            e.Image = image
            ' skip image
            e.Image = Nothing
        End Try
    End Sub
    ''' <summary>
    ''' Handles the <see cref="Vintasoft.Imaging.Pdf.Ocr.SearchablePdfGenerator.PdfPageAdded"/> event.
    ''' </summary>
    Private Shared Sub Command_PdfPageAdded(sender As Object, e As Vintasoft.Imaging.Pdf.Ocr.PdfPageAddedEventArgs)
        ' save PDF document every 10 pages
        If e.Document.Pages.Count Mod 10 = 0 Then
        End If
    End Sub
    /// <summary>
    /// Converts an image file to a searchable PDF document.
    /// </summary>
    /// <param name="sourceFilePath">A filename of source image file.</param>
    /// <param name="pageCreationMode">The PDF page creation mode.</param>
    /// <param name="ocrLanguage">An OCR language that should be used for text recognition.</param>
    /// <param name="pdfFilename">A filename of destination PDF file.</param>
    public static void ConvertImagesToSearchablePdf(
        string sourceFilePath,
        Vintasoft.Imaging.Pdf.Ocr.PdfPageCreationMode pageCreationMode,
        Vintasoft.Imaging.Ocr.OcrLanguage ocrLanguage,
        string pdfFilename)
        // create image collection
        using (Vintasoft.Imaging.ImageCollection images = new Vintasoft.Imaging.ImageCollection())
            // add images to the image collection
                // create Tesseract OCR engine
                using (Vintasoft.Imaging.Ocr.Tesseract.TesseractOcr tesseractOcrEngine = new Vintasoft.Imaging.Ocr.Tesseract.TesseractOcr())
                    // create OCR engine manager
                    Vintasoft.Imaging.Ocr.OcrEngineManager ocrEngineManager = new Vintasoft.Imaging.Ocr.OcrEngineManager(tesseractOcrEngine);
                    // create PDF generator
                    Vintasoft.Imaging.Pdf.Ocr.SearchablePdfGenerator pdfGenerator = new Vintasoft.Imaging.Pdf.Ocr.SearchablePdfGenerator(ocrEngineManager);
                    // set source images in PDF generator
                    pdfGenerator.SourceImages = images;
                    // set PDF page creation mode in PDF generator
                    pdfGenerator.PageCreationMode = pageCreationMode;
                    // create Tesseract OCR settings
                    Vintasoft.Imaging.Ocr.Tesseract.TesseractOcrSettings tesseractOcrSettings = new Vintasoft.Imaging.Ocr.Tesseract.TesseractOcrSettings(ocrLanguage);
                    tesseractOcrSettings.RecognitionRegionType =
                    // if PDF pages must be created in "TextOverImage" mode
                    if (pageCreationMode == Vintasoft.Imaging.Pdf.Ocr.PdfPageCreationMode.TextOverImage)
                        tesseractOcrSettings.UseSymbolRegionsCorrection = true;
                        tesseractOcrSettings.UseSymbolRegionsCorrection = false;
                    // set Tesseract OCR settings
                    pdfGenerator.OcrEngineSettings = tesseractOcrSettings;
                    // if PDF pages must be created in "Text" mode
                    if (pageCreationMode == Vintasoft.Imaging.Pdf.Ocr.PdfPageCreationMode.Text)
                        // set text color
                        pdfGenerator.TextColor = System.Drawing.Color.Black;
                        // text text font name
                        pdfGenerator.TextOnlyFontName = "Arial";
                    // subscribe to the PDF page added event for saving changes to PDF document
                    pdfGenerator.PdfPageAdded += Command_PdfPageAdded;
                    // subscribe to the image processing started event for preprocessing image
                    pdfGenerator.ImageProcessingStarted += PdfGenerator_ImageProcessingStarted;
                    // create PDF document
                    using (Vintasoft.Imaging.Pdf.PdfDocument document = new Vintasoft.Imaging.Pdf.PdfDocument(
                        pdfFilename, System.IO.FileMode.Create, Vintasoft.Imaging.Pdf.PdfFormat.Pdf_16))
                        // generate PDF document pages
                        // save PDF document
                // remove images
    /// <summary>
    /// Handles the <see cref="Vintasoft.Imaging.Pdf.Ocr.SearchablePdfGenerator.ImageProcessingStarted"/> event.
    /// </summary>
    private static void PdfGenerator_ImageProcessingStarted(object sender, Vintasoft.Imaging.Pdf.Ocr.OcrImageProcessingEventArgs e)
        // create image copy
        Vintasoft.Imaging.VintasoftImage image = (Vintasoft.Imaging.VintasoftImage)e.Image.Clone();
            // preprocess image
            Vintasoft.Imaging.ImageProcessing.Document.DeskewCommand deskewCommand =
                new Vintasoft.Imaging.ImageProcessing.Document.DeskewCommand();
            Vintasoft.Imaging.ImageProcessing.Document.AutoTextOrientationCommand autoTextOrientationCommand =
                new Vintasoft.Imaging.ImageProcessing.Document.AutoTextOrientationCommand();
            // change the processing image (the processing command will dispose image after use)
            e.Image = image;
            // skip image
            e.Image = null;
    /// <summary>
    /// Handles the <see cref="Vintasoft.Imaging.Pdf.Ocr.SearchablePdfGenerator.PdfPageAdded"/> event.
    /// </summary>
    private static void Command_PdfPageAdded(object sender, Vintasoft.Imaging.Pdf.Ocr.PdfPageAddedEventArgs e)
        // save PDF document every 10 pages
        if (e.Document.Pages.Count % 10 == 0)

    Иерархия наследования



    Целевые платформы: .NET 8; .NET 7; .NET 6; .NET Framework 4.8, 4.7, 4.6, 4.5, 4.0, 3.5

    Смотрите также