Confidence Свойство (OcrObject)
В этом разделе
Возвращает достоверность (в процентах) этого объекта.
Синтаксис
'Declaration
Public ReadOnly Property Confidence As Single
public float Confidence { get; }
public: __property float get_Confidence();
public:
property float Confidence {
float get();
}
Пример
Вот C#/VB.NET код, который демонстрирует, как фильтровать результаты распознавания.
''' <summary>
''' Recognizes text in images,
''' removes words with low confidence from recognized text and
''' returns recognized text.
''' </summary>
''' <param name="filename">The name of the file containing image to OCR.</param>
Public Function RecognizeTextAndFilterRecognitionResult(filename As String) As String
' minimum confidence
Const MIN_CONFIDENCE As Single = 75F
' create image collection
Using images As New Vintasoft.Imaging.ImageCollection()
' add images from file to image collection
images.Add(filename)
' create tesseract OCR engine
Using tesseractOcr As New Vintasoft.Imaging.Ocr.Tesseract.TesseractOcr()
' create tesseract OCR settings
Dim settings As New Vintasoft.Imaging.Ocr.OcrEngineSettings(Vintasoft.Imaging.Ocr.OcrLanguage.English)
tesseractOcr.Init(settings)
' create result builder
Dim result As New System.Text.StringBuilder()
' for each image in image collection
For Each image As Vintasoft.Imaging.VintasoftImage In images
' recognize the image
Dim page As Vintasoft.Imaging.Ocr.Results.OcrPage = tesseractOcr.Recognize(image)
' get all words in recognized text
Dim ocrObjects As Vintasoft.Imaging.Ocr.Results.OcrObject() = page.GetObjects(Vintasoft.Imaging.Ocr.OcrObjectType.Word)
' create list of words to remove
Dim removeObjects As New System.Collections.Generic.List(Of Vintasoft.Imaging.Ocr.Results.OcrObject)()
' for each word
For Each word As Vintasoft.Imaging.Ocr.Results.OcrObject In ocrObjects
' if word confidence is less than minimum confidence
If word.Confidence < MIN_CONFIDENCE Then
' add word to a list of words to remove
removeObjects.Add(word)
End If
Next
' validate recognition results (remove words with low confidence)
Dim editor As New Vintasoft.Imaging.Ocr.Results.OcrResultsEditor(page)
editor.RemoveObjects(removeObjects.ToArray())
editor.ValidateResults()
' get recognized text
Dim text As String = page.GetText()
' add recognized text to result
result.Append(text)
result.AppendLine()
Next
' dispose images and clear image collection
images.ClearAndDisposeItems()
' return result
Return result.ToString()
End Using
End Using
End Function
/// <summary>
/// Recognizes text in images,
/// removes words with low confidence from recognized text and
/// returns recognized text.
/// </summary>
/// <param name="filename">The name of the file containing image to OCR.</param>
public string RecognizeTextAndFilterRecognitionResult(string filename)
{
// minimum confidence
const float MIN_CONFIDENCE = 75.0f;
// create image collection
using (Vintasoft.Imaging.ImageCollection images = new Vintasoft.Imaging.ImageCollection())
{
// add images from file to image collection
images.Add(filename);
// create tesseract OCR engine
using (Vintasoft.Imaging.Ocr.Tesseract.TesseractOcr tesseractOcr =
new Vintasoft.Imaging.Ocr.Tesseract.TesseractOcr())
{
// create tesseract OCR settings
Vintasoft.Imaging.Ocr.OcrEngineSettings settings =
new Vintasoft.Imaging.Ocr.OcrEngineSettings(Vintasoft.Imaging.Ocr.OcrLanguage.English);
tesseractOcr.Init(settings);
// create result builder
System.Text.StringBuilder result = new System.Text.StringBuilder();
// for each image in image collection
foreach (Vintasoft.Imaging.VintasoftImage image in images)
{
// recognize the image
Vintasoft.Imaging.Ocr.Results.OcrPage page = tesseractOcr.Recognize(image);
// get all words in recognized text
Vintasoft.Imaging.Ocr.Results.OcrObject[] ocrObjects = page.GetObjects(
Vintasoft.Imaging.Ocr.OcrObjectType.Word);
// create list of words to remove
System.Collections.Generic.List<Vintasoft.Imaging.Ocr.Results.OcrObject> removeObjects =
new System.Collections.Generic.List<Vintasoft.Imaging.Ocr.Results.OcrObject>();
// for each word
foreach (Vintasoft.Imaging.Ocr.Results.OcrObject word in ocrObjects)
{
// if word confidence is less than minimum confidence
if (word.Confidence < MIN_CONFIDENCE)
// add word to a list of words to remove
removeObjects.Add(word);
}
// validate recognition results (remove words with low confidence)
Vintasoft.Imaging.Ocr.Results.OcrResultsEditor editor =
new Vintasoft.Imaging.Ocr.Results.OcrResultsEditor(page);
editor.RemoveObjects(removeObjects.ToArray());
editor.ValidateResults();
// get recognized text
string text = page.GetText();
// add recognized text to result
result.Append(text);
result.AppendLine();
}
// dispose images and clear image collection
images.ClearAndDisposeItems();
// return result
return result.ToString();
}
}
}
Требования
Целевые платформы: .NET 8; .NET 7; .NET 6; .NET Framework 4.8, 4.7, 4.6, 4.5, 4.0, 3.5
Смотрите также