|
|
|
|
#include "pch.h"
|
|
|
|
|
#include "OCRCharset.h"
|
|
|
|
|
#include "EasyOCR_Recognizer.h"
|
|
|
|
|
|
|
|
|
|
uns::EasyOCR_Recognizer::NormalizePAD::Size3i uns::EasyOCR_Recognizer::NormalizePAD::Size3i::operator=(const Size3i& obj)
|
|
|
|
|
{
|
|
|
|
|
d0 = obj.d0;
|
|
|
|
|
d1 = obj.d1;
|
|
|
|
|
d2 = obj.d2;
|
|
|
|
|
return (*this);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uns::EasyOCR_Recognizer::NormalizePAD::NormalizePAD(Size3i max_size, const std::string& PAD_type)
|
|
|
|
|
{
|
|
|
|
|
this->max_size = max_size;
|
|
|
|
|
this->PAD_type = PAD_type;
|
|
|
|
|
max_width_half = max_size.d2 / 2; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ȵ<EFBFBD>һ<EFBFBD>룬<EFBFBD><EBA3AC><EFBFBD>ڿ<EFBFBD>ѡ<EFBFBD><D1A1><EFBFBD><EFBFBD>
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
cv::Mat uns::EasyOCR_Recognizer::NormalizePAD::operator()(const cv::Mat& input_img) const
|
|
|
|
|
{
|
|
|
|
|
// <20><>ԭͼת<CDBC><D7AA>Ϊ32λ<32><CEBB><EFBFBD><EFBFBD><EFBFBD>Ͳ<EFBFBD><CDB2><EFBFBD>һ<EFBFBD><D2BB><EFBFBD><EFBFBD>[0,1]
|
|
|
|
|
cv::Mat img;
|
|
|
|
|
input_img.convertTo(img, CV_32F, 1.0 / 255); // line 10: img = toTensor
|
|
|
|
|
img = (img - 0.5f) / 0.5f; // line 11: img.sub_(0.5).div_(0.5)
|
|
|
|
|
int h = img.rows; // <20><>ȡͼ<C8A1><CDBC><EFBFBD>߶<EFBFBD>
|
|
|
|
|
int w = img.cols; // <20><>ȡͼ<C8A1><CDBC><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|
|
|
|
int c = img.channels(); // <20><>ȡͨ<C8A1><CDA8><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ҷ<EFBFBD>ͼĬ<CDBC><C4AC>Ϊ1
|
|
|
|
|
// <20><><EFBFBD><EFBFBD>Ŀ<EFBFBD><C4BF><EFBFBD><EFBFBD>С<EFBFBD><D0A1>ȫ<EFBFBD><C8AB>Mat<61><74><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ϊ32F<32><46><EFBFBD>ߴ<EFBFBD>Ϊmax_size.d1 x max_size.d2
|
|
|
|
|
cv::Mat pad_img = cv::Mat::zeros(max_size.d1, max_size.d2, CV_32FC(c)); // line 13
|
|
|
|
|
// <20><>ԭͼ<D4AD><EFBFBD><F1BFBDB1><EFBFBD>pad_img<6D><67><EFBFBD><EFBFBD><EFBFBD>Ͻ<EFBFBD><CFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʵ<EFBFBD><CAB5><EFBFBD>Ҳ<EFBFBD><D2B2><EFBFBD><EFBFBD><EFBFBD>
|
|
|
|
|
img.copyTo(pad_img(cv::Rect(0, 0, w, h))); // line 14
|
|
|
|
|
// <20><><EFBFBD><EFBFBD>Ŀ<EFBFBD><C4BF><EFBFBD><EFBFBD><EFBFBD>ȴ<EFBFBD><C8B4><EFBFBD>ԭͼ<D4AD><CDBC><EFBFBD>ȣ<EFBFBD><C8A3><EFBFBD>ʹ<EFBFBD><CAB9><EFBFBD><EFBFBD><EFBFBD><EFBFBD>һ<EFBFBD><D2BB><EFBFBD><EFBFBD><EFBFBD>ؽ<EFBFBD><D8BD><EFBFBD><EFBFBD><EFBFBD>չ<EFBFBD><D5B9><EFBFBD><EFBFBD>
|
|
|
|
|
if (max_size.d2 != w)
|
|
|
|
|
{ // line 15
|
|
|
|
|
cv::Mat last_col = img.col(w - 1);
|
|
|
|
|
cv::Mat border;
|
|
|
|
|
cv::repeat(last_col, 1, max_size.d2 - w, border); // <20>ظ<EFBFBD><D8B8><EFBFBD><EFBFBD><EFBFBD>һ<EFBFBD><D2BB><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|
|
|
|
border.copyTo(pad_img(cv::Rect(w, 0, max_size.d2 - w, h)));
|
|
|
|
|
}
|
|
|
|
|
return pad_img; // <20><><EFBFBD>ش<EFBFBD><D8B4><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ĸ<EFBFBD><C4B8><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
cv::Mat uns::EasyOCR_Recognizer::AlignCollate::AdjustContrastGrey(const cv::Mat& img_in, double target) const
|
|
|
|
|
{
|
|
|
|
|
double contrast;
|
|
|
|
|
int high, low;
|
|
|
|
|
ContrastGrey(img_in, contrast, high, low);
|
|
|
|
|
cv::Mat img = img_in.clone();
|
|
|
|
|
if (contrast < target)
|
|
|
|
|
{
|
|
|
|
|
cv::Mat img_i;
|
|
|
|
|
img.convertTo(img_i, CV_32S);
|
|
|
|
|
double ratio = 200.0 / std::max(10, high - low);
|
|
|
|
|
img_i = (img_i - low + 25) * ratio;
|
|
|
|
|
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵ<EFBFBD><D6B5><EFBFBD><EFBFBD><EFBFBD><EFBFBD>[0,255]<5D><>Χ<EFBFBD><CEA7><EFBFBD><EFBFBD>ת<EFBFBD><D7AA><EFBFBD><EFBFBD>8λ
|
|
|
|
|
img_i.forEach<int>([] (int& pixel, const int*)
|
|
|
|
|
{
|
|
|
|
|
pixel = std::clamp(pixel, 0, 255);
|
|
|
|
|
});
|
|
|
|
|
img_i.convertTo(img, CV_8U);
|
|
|
|
|
}
|
|
|
|
|
return img;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void uns::EasyOCR_Recognizer::AlignCollate::ContrastGrey(const cv::Mat& img, double& contrast, int& high, int& low) const
|
|
|
|
|
{
|
|
|
|
|
// <20><>Matͼ<74><CDBC><EFBFBD><EFBFBD><EFBFBD>ݸ<EFBFBD><DDB8>Ƶ<EFBFBD>һ<EFBFBD><D2BB><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>vector<int><3E>У<EFBFBD><D0A3>Ա<EFBFBD><D4B1><EFBFBD><EFBFBD><EFBFBD>
|
|
|
|
|
std::vector<int> pixels;
|
|
|
|
|
pixels.reserve(img.rows * img.cols); // Ԥ<><D4A4><EFBFBD><EFBFBD><EFBFBD>ռ<EFBFBD><D5BC><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ч<EFBFBD><D0A7>
|
|
|
|
|
for (int i = 0; i < img.rows; ++i)
|
|
|
|
|
{
|
|
|
|
|
const uchar* row_ptr = img.ptr<uchar>(i);
|
|
|
|
|
for (int j = 0; j < img.cols; ++j)
|
|
|
|
|
pixels.push_back(static_cast<int>(row_ptr[j]));
|
|
|
|
|
}
|
|
|
|
|
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵ<EFBFBD><D6B5><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><F2A3ACB1>ڻ<EFBFBD>ȡ<EFBFBD>ٷ<EFBFBD>λ<EFBFBD><CEBB>
|
|
|
|
|
std::sort(pixels.begin(), pixels.end());
|
|
|
|
|
// <20><><EFBFBD><EFBFBD>90%<25><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>λ<EFBFBD>ã<EFBFBD><C3A3><EFBFBD>Python np.percentile<6C><65><EFBFBD><EFBFBD>һ<EFBFBD><D2BB>
|
|
|
|
|
int idx90 = static_cast<int>(0.9 * (pixels.size() - 1));
|
|
|
|
|
int idx10 = static_cast<int>(0.1 * (pixels.size() - 1));
|
|
|
|
|
high = pixels[idx90];
|
|
|
|
|
low = pixels[idx10];
|
|
|
|
|
// <20><><EFBFBD><EFBFBD>contrast: (high - low) / max(10, high + low)
|
|
|
|
|
contrast = double(high - low) / double(std::max(10, high + low));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uns::EasyOCR_Recognizer::AlignCollate::AlignCollate(int imgH, int imgW, bool keep_ratio_with_pad, double adjust_contrast)
|
|
|
|
|
{
|
|
|
|
|
this->imgH = imgH;
|
|
|
|
|
this->imgW = imgW;
|
|
|
|
|
this->adjust_contrast = adjust_contrast;
|
|
|
|
|
this->keep_ratio_with_pad = keep_ratio_with_pad;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
cv::Mat uns::EasyOCR_Recognizer::AlignCollate::operator()(const std::vector<cv::Mat>& batch) const
|
|
|
|
|
{
|
|
|
|
|
std::vector<cv::Mat> resized_images;
|
|
|
|
|
|
|
|
|
|
// <20><><EFBFBD><EFBFBD>NormalizePADʵ<44><CAB5><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ڹ<EFBFBD>һ<EFBFBD><D2BB><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|
|
|
|
NormalizePAD transform({ 1, imgH, imgW });
|
|
|
|
|
|
|
|
|
|
for (const cv::Mat& image : batch)
|
|
|
|
|
{
|
|
|
|
|
cv::Mat working;
|
|
|
|
|
if (adjust_contrast > 0)
|
|
|
|
|
{
|
|
|
|
|
cv::Mat grey;
|
|
|
|
|
if (image.channels() > 1)
|
|
|
|
|
cv::cvtColor(image, grey, cv::COLOR_BGR2GRAY);
|
|
|
|
|
else
|
|
|
|
|
grey = image;
|
|
|
|
|
working = AdjustContrastGrey(grey, adjust_contrast);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
working = image;
|
|
|
|
|
int w = working.cols;
|
|
|
|
|
int h = working.rows;
|
|
|
|
|
double ratio = double(w) / h;
|
|
|
|
|
int resized_w = static_cast<int>(std::ceil(imgH * ratio));
|
|
|
|
|
if (resized_w > imgW)
|
|
|
|
|
resized_w = imgW;
|
|
|
|
|
cv::Mat resized;
|
|
|
|
|
cv::resize(working, resized, cv::Size(resized_w, imgH), 0, 0, cv::INTER_CUBIC);
|
|
|
|
|
cv::Mat tensor = transform(resized);
|
|
|
|
|
resized_images.push_back(tensor);
|
|
|
|
|
}
|
|
|
|
|
cv::Mat blob;
|
|
|
|
|
cv::dnn::blobFromImages(resized_images, blob);
|
|
|
|
|
return blob;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
float uns::EasyOCR_Recognizer::CustomMean(const VecFloat& x)
|
|
|
|
|
{
|
|
|
|
|
size_t N = x.size();
|
|
|
|
|
if (N == 0)
|
|
|
|
|
return 0.0f;
|
|
|
|
|
// 1. <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ԫ<EFBFBD>صij˻<C4B3>
|
|
|
|
|
double prod = 1.0;
|
|
|
|
|
for (float v : x)
|
|
|
|
|
if (v != 0)
|
|
|
|
|
prod *= static_cast<double>(v);
|
|
|
|
|
// 2. <20><><EFBFBD><EFBFBD>ָ<EFBFBD><D6B8> 2.0 / sqrt(N)
|
|
|
|
|
double exponent = 2.0 / std::sqrt(static_cast<double>(N));
|
|
|
|
|
// 3. <20><><EFBFBD><EFBFBD> prod <20><> exponent <20><><EFBFBD><EFBFBD>
|
|
|
|
|
return static_cast<float>(std::pow(prod, exponent));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
cv::Mat uns::EasyOCR_Recognizer::Preprocess(const cv::Mat& img) const
|
|
|
|
|
{
|
|
|
|
|
if (img.empty())
|
|
|
|
|
return {}; //<2F>˴<EFBFBD><CBB4><EFBFBD><EFBFBD>ʺ<EFBFBD><CABA>׳<EFBFBD><D7B3>쳣<EFBFBD><ECB3A3>ʹ<EFBFBD>ÿ<EFBFBD>ͼ<EFBFBD><CDBC><EFBFBD><EFBFBD>ֹ<EFBFBD>Ĵ<F3BCB6B5><C4B4><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|
|
|
|
cv::Mat gray;
|
|
|
|
|
int ch = img.channels();
|
|
|
|
|
// case 2: BGR <20><>ɫͼ<C9AB><CDBC>3 ͨ<><CDA8><EFBFBD><EFBFBD>
|
|
|
|
|
if (ch == 3)
|
|
|
|
|
cv::cvtColor(img, gray, cv::COLOR_BGR2GRAY);
|
|
|
|
|
// case 3: RGBA <20><>ɫͼ<C9AB><CDBC>4 ͨ<><CDA8><EFBFBD><EFBFBD>
|
|
|
|
|
else if (ch == 4)
|
|
|
|
|
{
|
|
|
|
|
// ȥ<><C8A5> alpha ͨ<><CDA8><EFBFBD><EFBFBD><EFBFBD><EFBFBD> BGRA <20><> GRAY
|
|
|
|
|
cv::Mat bgr;
|
|
|
|
|
cv::cvtColor(img, gray, cv::COLOR_BGRA2GRAY);
|
|
|
|
|
}
|
|
|
|
|
else // image <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> (h<><68>w) <20><><EFBFBD><EFBFBD> (h<><68>w<EFBFBD><77>1)<29><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>˵<EFBFBD><CBB5><EFBFBD><EFBFBD><EFBFBD>Ҷȴ<D2B6><C8B4><EFBFBD>
|
|
|
|
|
gray = img;
|
|
|
|
|
int width = gray.cols;
|
|
|
|
|
int height = gray.rows;
|
|
|
|
|
int model_height = 64, model_width = 0;
|
|
|
|
|
float ratio = static_cast<float>(width) / static_cast<float>(height);
|
|
|
|
|
cv::Mat resized;
|
|
|
|
|
if (ratio < 1.0f)
|
|
|
|
|
{
|
|
|
|
|
// <20><>ֱ<EFBFBD>ı<EFBFBD><C4B1><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʹ<EFBFBD><CAB9> calculate_ratio <20><>֤<EFBFBD>߶<EFBFBD>Ϊ model_height
|
|
|
|
|
float adj_ratio = CalculateRatio(width, height);
|
|
|
|
|
model_width = static_cast<int>(model_height * adj_ratio);
|
|
|
|
|
cv::resize(gray, resized, cv::Size(model_height, model_width), 0, 0, cv::INTER_LINEAR);
|
|
|
|
|
ratio = adj_ratio;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
// <20><><EFBFBD><EFBFBD><EFBFBD>ı<EFBFBD><C4B1><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>߶<EFBFBD>Ϊ model_height
|
|
|
|
|
model_width = static_cast<int>(model_height * ratio);
|
|
|
|
|
cv::resize(gray, resized, cv::Size(model_width, model_height), 0, 0, cv::INTER_LINEAR);
|
|
|
|
|
}
|
|
|
|
|
AlignCollate alignCollate(model_height, model_width, true, 0.5);
|
|
|
|
|
return alignCollate({ resized });
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
float uns::EasyOCR_Recognizer::CalculateRatio(int width, int height) const
|
|
|
|
|
{
|
|
|
|
|
float ratio = static_cast<float>(width) / static_cast<float>(height);
|
|
|
|
|
if (ratio < 1.0f)
|
|
|
|
|
ratio = 1.0f / ratio;
|
|
|
|
|
return ratio;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uns::VecFloat uns::EasyOCR_Recognizer::SoftMAX(const float* logits, int C) const
|
|
|
|
|
{
|
|
|
|
|
// <20>ҵ<EFBFBD><D2B5><EFBFBD><EFBFBD><EFBFBD>ֵ<EFBFBD><D6B5><EFBFBD>ȶ<EFBFBD><C8B6><EFBFBD>ֵ
|
|
|
|
|
float m = logits[0];
|
|
|
|
|
for (int i = 1; i < C; ++i)
|
|
|
|
|
m = std::max(m, logits[i]);
|
|
|
|
|
// <20><><EFBFBD><EFBFBD> exp(logit - m)
|
|
|
|
|
std::vector<float> exps(C);
|
|
|
|
|
float sum = 0.f;
|
|
|
|
|
for (int i = 0; i < C; ++i)
|
|
|
|
|
{
|
|
|
|
|
exps[i] = std::exp(logits[i] - m);
|
|
|
|
|
sum += exps[i];
|
|
|
|
|
}
|
|
|
|
|
// <20><>һ<EFBFBD><D2BB>
|
|
|
|
|
for (int i = 0; i < C; ++i)
|
|
|
|
|
exps[i] /= (sum > 1e-6f ? sum : 1e-6f);
|
|
|
|
|
return exps;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void uns::EasyOCR_Recognizer::PostprocessONNXOutput(const Ort::Value& outputs, int N, int T, int C, VecInt& out_indices, VecFloat& out_probs, const VecInt ignore_idx)
|
|
|
|
|
{
|
|
|
|
|
// ָ<><D6B8><EFBFBD><EFBFBD><EFBFBD>ʵײ<CAB5><D7B2><EFBFBD><EFBFBD><EFBFBD>
|
|
|
|
|
const float* data = outputs.GetTensorData<float>();
|
|
|
|
|
out_indices.clear();
|
|
|
|
|
out_probs.clear();
|
|
|
|
|
// <20><>ʱ<EFBFBD>洢ÿ<E6B4A2><C3BF><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|
|
|
|
std::vector<float> probs;
|
|
|
|
|
probs.reserve(C);
|
|
|
|
|
// <20><><EFBFBD><EFBFBD>ÿ<EFBFBD><C3BF><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ÿ<EFBFBD><C3BF>ʱ<EFBFBD>䲽
|
|
|
|
|
for (int n = 0; n < N; ++n)
|
|
|
|
|
{
|
|
|
|
|
for (int t = 0; t < T; ++t)
|
|
|
|
|
{
|
|
|
|
|
// logits <20><>ʼλ<CABC><CEBB>: ((n * T) + t) * C
|
|
|
|
|
const float* logits = data + ((size_t)n * T + t) * C;
|
|
|
|
|
// 1) Softmax
|
|
|
|
|
probs = SoftMAX(logits, C);
|
|
|
|
|
// 2) <20><><EFBFBD><EFBFBD> ignore_idx
|
|
|
|
|
if (!ignore_idx.empty())
|
|
|
|
|
for (const auto& idx : ignore_idx)
|
|
|
|
|
probs[idx] = 0.f;
|
|
|
|
|
// 3) <20>ٴι<D9B4>һ<EFBFBD><D2BB>
|
|
|
|
|
float sum = 0.f;
|
|
|
|
|
for (int c = 0; c < C; ++c)
|
|
|
|
|
sum += probs[c];
|
|
|
|
|
if (sum > 1e-6f)
|
|
|
|
|
{
|
|
|
|
|
for (int c = 0; c < C; ++c)
|
|
|
|
|
probs[c] /= sum;
|
|
|
|
|
}
|
|
|
|
|
// 4) ȡ<><C8A1><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|
|
|
|
int best = 0;
|
|
|
|
|
float best_prob = 0.0f;
|
|
|
|
|
for (int c = 1; c < C; ++c)
|
|
|
|
|
{
|
|
|
|
|
if (probs[c] > probs[best])
|
|
|
|
|
{
|
|
|
|
|
best = c;
|
|
|
|
|
best_prob = probs[c];
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
out_indices.push_back(best);
|
|
|
|
|
out_probs.push_back(best_prob);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uns::EasyOCR_Recognizer::EasyOCR_Recognizer()
|
|
|
|
|
{
|
|
|
|
|
ort_inited = false;
|
|
|
|
|
ort_cpu_session = nullptr;
|
|
|
|
|
model_path = G_OCRConfig.GetRecognizeModelPath();
|
|
|
|
|
ort = OrtGetApiBase()->GetApi(ORT_API_VERSION);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool uns::EasyOCR_Recognizer::Init()
|
|
|
|
|
{
|
|
|
|
|
if (ort_inited)
|
|
|
|
|
return true;
|
|
|
|
|
if (!RecheckModelInfo())
|
|
|
|
|
return false;
|
|
|
|
|
try
|
|
|
|
|
{
|
|
|
|
|
ort_env = Ort::Env(ORT_LOGGING_LEVEL_WARNING, "EasyOCR_Recognizer");
|
|
|
|
|
bool fallback_to_cpu = false;
|
|
|
|
|
if (!OCRToolBox::AutoSelectEP(ort, ort_session_options, fallback_to_cpu))
|
|
|
|
|
return false;
|
|
|
|
|
OCRToolBox::InitOrtSessionOptions(ort_session_options);
|
|
|
|
|
if ((G_OCRConfig.GetGPUUsage() == easyocr::GPUUsage::CPUOnly) || fallback_to_cpu) //ʹ<><CAB9>CPU<50><55><EFBFBD><EFBFBD>ʼ<EFBFBD><CABC>cpu session
|
|
|
|
|
{
|
|
|
|
|
ort_cpu_session = new Ort::Session(ort_env, model_path.c_str(), ort_session_options);
|
|
|
|
|
//ͨ<><CDA8>CPU session<6F><6E>ȡ<EFBFBD><C8A1><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|
|
|
|
OCRToolBox::GetInputOutputNames(ort_cpu_session, input_names, input_ns, output_names, output_ns);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
//ͨ<><CDA8><EFBFBD><EFBFBD>ʱsession<6F><6E>ȡ<EFBFBD><C8A1><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>CUDA<44><41><EFBFBD>̲߳<DFB3><CCB2><EFBFBD>ȫ<EFBFBD><C8AB>
|
|
|
|
|
Ort::Session ort_session(ort_env, model_path.c_str(), ort_session_options);
|
|
|
|
|
OCRToolBox::GetInputOutputNames(&ort_session, input_names, input_ns, output_names, output_ns);
|
|
|
|
|
}
|
|
|
|
|
ort_inited = true;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
catch (...)
|
|
|
|
|
{
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool uns::EasyOCR_Recognizer::UnInit()
|
|
|
|
|
{
|
|
|
|
|
try
|
|
|
|
|
{
|
|
|
|
|
if (ort_cpu_session != nullptr)
|
|
|
|
|
delete ort_cpu_session;
|
|
|
|
|
ort_cpu_session = nullptr;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
catch (...)
|
|
|
|
|
{
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool uns::EasyOCR_Recognizer::RecheckModelInfo()
|
|
|
|
|
{
|
|
|
|
|
if (model_path.empty())
|
|
|
|
|
model_path = G_OCRConfig.GetRecognizeModelPath();
|
|
|
|
|
return OCRToolBox::CheckFile(model_path);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uns::EOCR_Result uns::EasyOCR_Recognizer::operator()(const cv::Mat& image)
|
|
|
|
|
{
|
|
|
|
|
try
|
|
|
|
|
{
|
|
|
|
|
if (!RecheckModelInfo())
|
|
|
|
|
return { L"", -1.0f };
|
|
|
|
|
cv::Mat input = Preprocess(image);
|
|
|
|
|
if (input.empty())
|
|
|
|
|
return { L"", 0.0f };
|
|
|
|
|
std::array<int64_t, 4> inputShape = { 1, 1, input.size[2], input.size[3] };
|
|
|
|
|
Ort::MemoryInfo memInfo = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
|
|
|
|
|
Ort::Value inputTensor = Ort::Value::CreateTensor<float>(memInfo, input.ptr<float>(), input.total(), inputShape.data(), inputShape.size());
|
|
|
|
|
auto outputs = ((ort_cpu_session != nullptr) ? ort_cpu_session->Run(Ort::RunOptions{nullptr}, input_names.data(), & inputTensor, 1, output_names.data(), 1) : Ort::Session(ort_env, model_path.c_str(), ort_session_options).Run(Ort::RunOptions{nullptr}, input_names.data(), & inputTensor, 1, output_names.data(), 1));
|
|
|
|
|
// <20><><EFBFBD><EFBFBD> shape: [1, T, C]
|
|
|
|
|
auto& outVal = outputs.front();
|
|
|
|
|
auto info = outVal.GetTensorTypeAndShapeInfo();
|
|
|
|
|
auto shape = info.GetShape(); // {1, T, C}
|
|
|
|
|
int N = (int)shape[0], T = (int)shape[1], C = (int)shape[2];
|
|
|
|
|
float* data = outVal.GetTensorMutableData<float>();
|
|
|
|
|
// greedy pick & softmax
|
|
|
|
|
std::vector<int> indices(T);
|
|
|
|
|
std::vector<float> maxProbs(T);
|
|
|
|
|
PostprocessONNXOutput(outputs[0], N, T, C, indices, maxProbs);
|
|
|
|
|
// <20><><EFBFBD><EFBFBD>
|
|
|
|
|
std::wstring text = OCRCharset::GetString(indices);
|
|
|
|
|
// <20><><EFBFBD>Ŷ<EFBFBD>
|
|
|
|
|
float conf = CustomMean(maxProbs);
|
|
|
|
|
return { text, conf };
|
|
|
|
|
}
|
|
|
|
|
catch (...)
|
|
|
|
|
{
|
|
|
|
|
return { L"", -2.0f };
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uns::EOCR_ResultSet uns::EasyOCR_Recognizer::operator()(const cv::Mat& full_image, const EOCRD_Rects& rects)
|
|
|
|
|
{
|
|
|
|
|
if (!RecheckModelInfo())
|
|
|
|
|
return {};
|
|
|
|
|
try
|
|
|
|
|
{
|
|
|
|
|
EOCR_ResultSet result_set;
|
|
|
|
|
for (size_t i = 0; i < rects.size(); ++i)
|
|
|
|
|
{
|
|
|
|
|
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>תΪ<D7AA><CEAA>С<EFBFBD><D0A1><EFBFBD>Ӿ<EFBFBD><D3BE>β<EFBFBD><CEB2>ü<EFBFBD>
|
|
|
|
|
cv::Rect rect = cv::boundingRect(rects[i]);
|
|
|
|
|
rect &= cv::Rect(0, 0, full_image.cols, full_image.rows); // <20>ü<EFBFBD><C3BC><EFBFBD>ͼ<EFBFBD><CDBC><EFBFBD><EFBFBD>Χ
|
|
|
|
|
cv::Mat crop = full_image(rect);
|
|
|
|
|
if (crop.empty())
|
|
|
|
|
continue;
|
|
|
|
|
auto [text, conf] = (*this)(crop);
|
|
|
|
|
result_set.insert({ i, { text, conf } });
|
|
|
|
|
}
|
|
|
|
|
return result_set;
|
|
|
|
|
}
|
|
|
|
|
catch (...)
|
|
|
|
|
{
|
|
|
|
|
return {};
|
|
|
|
|
}
|
|
|
|
|
}
|