VintaSoft Imaging .NET SDK 14.0: Документация для .NET разработчика
В этом разделе
    Обработка изображений: Как определить ориентацию изображения документа?
    В этом разделе
    VintaSoft Imaging .NET SDK и VintaSoft Document Cleanup .NET Plug-in предоставляют команды для обработки и очистки изображений документов.
    Команда GetTextOrientationCommand предназначена для определения ориентации изображений, содержащих Латинский текст, повернутый на 90, 180 или 270 градусов.
    GetTextOrientationCommand не подходит для обработки:

    VintaSoft Imaging .NET SDK и VintaSoft OCR .NET Plug-in предоставляют команду GetTesseractOcrTextOrientationCommand, которая предназначена для определения ориентации изображения документа с помощью OCR-движка Tesseract. Команда GetTesseractOcrTextOrientationCommand может определить ориентацию любого изображения с текстом (не имеет ограничений, как команда GetTextOrientationCommand), но команда GetTesseractOcrTextOrientationCommand работает до 5 раз медленнее, чем команда GetTextOrientationCommand.


    Исходя из вышеизложенного, необходимо использовать команду GetTextOrientationCommand или GetTesseractOcrTextOrientationCommand в зависимости от типа изображений входного документа.

    Также можно использовать обе команды вместе, а именно:
    Совместное использование обеих команд позволяет добиться максимальной эффективности и качества определения ориентации изображения документа.


    Вот пример, показывающий, как определить ориентацию изображения документа с помощью команды GetTextOrientationCommand:
    /// <summary>
    /// Returns an orientation angle of document image using statistics for Latin symbols.
    /// </summary>
    /// <param name="filename">The path to a file with document image.</param>
    public static void GetDocumentImageOrientationUsingLatinSymbolStat(string filename)
    {
        // create an image collection
        using (Vintasoft.Imaging.ImageCollection images = new Vintasoft.Imaging.ImageCollection())
        {
            // add images from file to the image collection
            images.Add(filename);
    
            // create an instance of GetTextOrientationCommand class
            Vintasoft.Imaging.ImageProcessing.Info.GetTextOrientationCommand getTextOrientationCommand1 =
                new Vintasoft.Imaging.ImageProcessing.Info.GetTextOrientationCommand();
    
            // for each image in image collection
            for (int i = 0; i < images.Count; i++)
            {
                // get image
                Vintasoft.Imaging.VintasoftImage image = images[i];
    
                // determine orientation of document image using statistics for Latin symbols
                getTextOrientationCommand1.ExecuteInPlace(image);
    
                // write result to the console
                System.Console.WriteLine(string.Format("Filename: {0}, page: {1}, page orientation: {2}, confidence: {3}",
                    System.IO.Path.GetFileName(filename),
                    i,
                    getTextOrientationCommand1.Orientation,
                    getTextOrientationCommand1.Confidence));
            }
    
            // free images
            images.ClearAndDisposeItems();
        }
    }
    
    ''' <summary>
    ''' Returns an orientation angle of document image using statistics for Latin symbols.
    ''' </summary>
    ''' <param name="filename">The path to a file with document image.</param>
    Public Shared Sub GetDocumentImageOrientationUsingLatinSymbolStat(filename As String)
        ' create an image collection
        Using images As New Vintasoft.Imaging.ImageCollection()
            ' add images from file to the image collection
            images.Add(filename)
    
            ' create an instance of GetTextOrientationCommand class
            Dim getTextOrientationCommand1 As New Vintasoft.Imaging.ImageProcessing.Info.GetTextOrientationCommand()
    
            ' for each image in image collection
            For i As Integer = 0 To images.Count - 1
                ' get image
                Dim image As Vintasoft.Imaging.VintasoftImage = images(i)
    
                ' determine orientation of document image using statistics for Latin symbols
                getTextOrientationCommand1.ExecuteInPlace(image)
    
                ' write result to the console
                System.Console.WriteLine(String.Format("Filename: {0}, page: {1}, page orientation: {2}, confidence: {3}", System.IO.Path.GetFileName(filename), i, getTextOrientationCommand1.Orientation, getTextOrientationCommand1.Confidence))
            Next
    
            ' free images
            images.ClearAndDisposeItems()
        End Using
    End Sub
    


    Вот пример, показывающий, как определить ориентацию изображения документа с помощью команды GetTesseractOcrTextOrientationCommand:
    /// <summary>
    /// Returns an orientation angle of document image using Tesseract OCR.
    /// </summary>
    /// <param name="filename">The path to a file with document image.</param>
    /// <param name="tesseractOcrDllDirectory">A path to a directory, where Tesseract5.Vintasoft.xXX.dll files are located.</param>
    public static void GetDocumentImageOrientationUsingTesseractOCR(string filename, string tesseractOcrDllDirectory)
    {
        // create an image collection
        using (Vintasoft.Imaging.ImageCollection images = new Vintasoft.Imaging.ImageCollection())
        {
            // add images from file to the image collection
            images.Add(filename);
    
            // create an instance of GetTesseractOcrTextOrientationCommand class
            using (Vintasoft.Imaging.ImageProcessing.Ocr.Tesseract.GetTesseractOcrTextOrientationCommand getTextOrientationCommand =
                new Vintasoft.Imaging.ImageProcessing.Ocr.Tesseract.GetTesseractOcrTextOrientationCommand())
            {
                // specify path to a directory, where Tesseract5.Vintasoft.xXX.dll files are located
                getTextOrientationCommand.TesseractOcrDllDirectory = tesseractOcrDllDirectory;
    
                // for each image in image collection
                for (int i = 0; i < images.Count; i++)
                {
                    // get image
                    Vintasoft.Imaging.VintasoftImage image = images[i];
    
                    // determine orientation of document image using Tesseract OCR
                    getTextOrientationCommand.ExecuteInPlace(image);
    
                    // write result to the console
                    System.Console.WriteLine(string.Format("Filename: {0}, page: {1}, page orientation: {2}",
                        System.IO.Path.GetFileName(filename),
                        i,
                        getTextOrientationCommand.Orientation));
                }
            }
    
            // free images
            images.ClearAndDisposeItems();
        }
    }
    
    ''' <summary>
    ''' Returns an orientation angle of document image using Tesseract OCR.
    ''' </summary>
    ''' <param name="filename">The path to a file with document image.</param>
    ''' <param name="tesseractOcrDllDirectory">A path to a directory, where Tesseract5.Vintasoft.xXX.dll files are located.</param>
    Public Shared Sub GetDocumentImageOrientationUsingTesseractOCR(filename As String, tesseractOcrDllDirectory As String)
        ' create an image collection
        Using images As New Vintasoft.Imaging.ImageCollection()
            ' add images from file to the image collection
            images.Add(filename)
    
            ' create an instance of GetTesseractOcrTextOrientationCommand class
            Using getTextOrientationCommand As New Vintasoft.Imaging.ImageProcessing.Ocr.Tesseract.GetTesseractOcrTextOrientationCommand()
                ' specify path to a directory, where Tesseract5.Vintasoft.xXX.dll files are located
                getTextOrientationCommand.TesseractOcrDllDirectory = tesseractOcrDllDirectory
    
                ' for each image in image collection
                For i As Integer = 0 To images.Count - 1
                    ' get image
                    Dim image As Vintasoft.Imaging.VintasoftImage = images(i)
    
                    ' determine orientation of document image using Tesseract OCR
                    getTextOrientationCommand.ExecuteInPlace(image)
    
                    ' write result to the console
                    System.Console.WriteLine(String.Format("Filename: {0}, page: {1}, page orientation: {2}", System.IO.Path.GetFileName(filename), i, getTextOrientationCommand.Orientation))
                Next
            End Using
    
            ' free images
            images.ClearAndDisposeItems()
        End Using
    End Sub
    


    Вот пример, показывающий, как определить ориентацию изображения документа с помощью команд GetTextOrientationCommand и GetTesseractOcrTextOrientationCommand:
    /// <summary>
    /// Returns an orientation angle of document image using statistics for Latin symbols and using Tesseract OCR.
    /// </summary>
    /// <param name="filename">The path to a file with document image.</param>
    /// <param name="tesseractOcrDllDirectory">A path to a directory, where Tesseract5.Vintasoft.xXX.dll files are located.</param>
    public static void GetDocumentImageOrientationUsingLatinSymbolStatAndOcrTesseract(string filename, string tesseractOcrDllDirectory)
    {
        // create an image collection
        using (Vintasoft.Imaging.ImageCollection images = new Vintasoft.Imaging.ImageCollection())
        {
            // add images from file to the image collection
            images.Add(filename);
    
            // create an instance of GetTextOrientationCommand class
            Vintasoft.Imaging.ImageProcessing.Info.GetTextOrientationCommand getTextOrientationCommand1 =
                new Vintasoft.Imaging.ImageProcessing.Info.GetTextOrientationCommand();
    
            // create an instance of GetTesseractOcrTextOrientationCommand class
            using (Vintasoft.Imaging.ImageProcessing.Ocr.Tesseract.GetTesseractOcrTextOrientationCommand getTextOrientationCommand2 =
                new Vintasoft.Imaging.ImageProcessing.Ocr.Tesseract.GetTesseractOcrTextOrientationCommand())
            {
                // specify path to a directory, where Tesseract5.Vintasoft.xXX.dll files are located
                getTextOrientationCommand2.TesseractOcrDllDirectory = tesseractOcrDllDirectory;
    
                // for each image in image collection
                for (int i = 0; i < images.Count; i++)
                {
                    // get image
                    Vintasoft.Imaging.VintasoftImage image = images[i];
    
                    // determine orientation of document image using statistics for Latin symbols (works for Latin text only)
                    getTextOrientationCommand1.ExecuteInPlace(image);
                    // if orientation is detected and orientation result has high confidence
                    if (getTextOrientationCommand1.Orientation != Vintasoft.Imaging.ImageProcessing.Info.ImageOrthogonalOrientation.Undefined &&
                        getTextOrientationCommand1.Confidence > 0.3)
                    {
                        // write result to the console
                        System.Console.WriteLine(string.Format("Filename: {0}, page: {1}, page orientation: {2}, confidence: {3}",
                            System.IO.Path.GetFileName(filename),
                            i,
                            getTextOrientationCommand1.Orientation,
                            getTextOrientationCommand1.Confidence));
                    }
                    // if orientation is not detected or orientation result has low confidence
                    else
                    {
                        // determine orientation of document image using Tesseract OCR (works for any text)
                        getTextOrientationCommand2.ExecuteInPlace(image);
    
                        // write result to the console
                        System.Console.WriteLine(string.Format("Filename: {0}, page: {1}, page orientation: {2}, confidence: {3}",
                            System.IO.Path.GetFileName(filename),
                            i,
                            getTextOrientationCommand2.Orientation,
                            getTextOrientationCommand2.Confidence));
                    }
                }
            }
    
            // free images
            images.ClearAndDisposeItems();
        }
    }
    
    ''' <summary>
    ''' Returns an orientation angle of document image using statistics for Latin symbols and using Tesseract OCR.
    ''' </summary>
    ''' <param name="filename">The path to a file with document image.</param>
    ''' <param name="tesseractOcrDllDirectory">A path to a directory, where Tesseract5.Vintasoft.xXX.dll files are located.</param>
    Public Shared Sub GetDocumentImageOrientationUsingLatinSymbolStatAndOcrTesseract(filename As String, tesseractOcrDllDirectory As String)
        ' create an image collection
        Using images As New Vintasoft.Imaging.ImageCollection()
            ' add images from file to the image collection
            images.Add(filename)
    
            ' create an instance of GetTextOrientationCommand class
            Dim getTextOrientationCommand1 As New Vintasoft.Imaging.ImageProcessing.Info.GetTextOrientationCommand()
    
            ' create an instance of GetTesseractOcrTextOrientationCommand class
            Using getTextOrientationCommand2 As New Vintasoft.Imaging.ImageProcessing.Ocr.Tesseract.GetTesseractOcrTextOrientationCommand()
                ' specify path to a directory, where Tesseract5.Vintasoft.xXX.dll files are located
                getTextOrientationCommand2.TesseractOcrDllDirectory = tesseractOcrDllDirectory
    
                ' for each image in image collection
                For i As Integer = 0 To images.Count - 1
                    ' get image
                    Dim image As Vintasoft.Imaging.VintasoftImage = images(i)
    
                    ' determine orientation of document image using statistics for Latin symbols (works for Latin text only)
                    getTextOrientationCommand1.ExecuteInPlace(image)
                    ' if orientation is detected and orientation result has high confidence
                    If getTextOrientationCommand1.Orientation <> Vintasoft.Imaging.ImageProcessing.Info.ImageOrthogonalOrientation.Undefined AndAlso getTextOrientationCommand1.Confidence > 0.3 Then
                        ' write result to the console
                        System.Console.WriteLine(String.Format("Filename: {0}, page: {1}, page orientation: {2}, confidence: {3}", System.IO.Path.GetFileName(filename), i, getTextOrientationCommand1.Orientation, getTextOrientationCommand1.Confidence))
                    Else
                        ' if orientation is not detected or orientation result has low confidence
                        ' determine orientation of document image using Tesseract OCR (works for any text)
                        getTextOrientationCommand2.ExecuteInPlace(image)
    
                        ' write result to the console
                        System.Console.WriteLine(String.Format("Filename: {0}, page: {1}, page orientation: {2}, confidence: {3}", System.IO.Path.GetFileName(filename), i, getTextOrientationCommand2.Orientation, getTextOrientationCommand2.Confidence))
                    End If
                Next
            End Using
    
            ' free images
            images.ClearAndDisposeItems()
        End Using
    End Sub