Archive for December 15th, 2022

15
Dec

OCR pytesseract and google OCR

   Posted by: admin    in Mẹo vặt của hiếu râu

27  yum install gcc openssl-devel bzip2-devel libffi-devel zlib-devel xz-devel

31  wget https://www.python.org/ftp/python/3.7.11/Python-3.7.11.tgz

32  tar -xvf Python-3.7.11.tgz

33  cd Python-3.7.11

34  ./configure –enable-optimizations

35  make altinstall

37  yum install -y https://repo.ius.io/ius-release-el7.rpm

38  yum install -y python36u python36u-libs python36u-devel python36u-pip

39  yum install epel-release

43  python3.7 –version

44  python3.7 -m pip

47  python3.7 -m ensurepip

49  pip3.7 install pytesseract

50  pip3.7 install tox

70  wget –no-check-certificate  https://download.opensuse.org/repositories/home:/Alexander_Pozdnyakov/CentOS_7/x86_64/tesseract-4.1.3+git4271-3.1.x86_64.rpm

73  wget  –no-check-certificate https://download.opensuse.org/repositories/home:/Alexander_Pozdnyakov/CentOS_7/noarch/tesseract-langpack-eng-4.00~git30-5.5.noarch.rpm

74  rpm -ivh tesseract-langpack-eng-4.00~git30-5.5.noarch.rpm

78  wget –no-check-certificate  https://download.opensuse.org/repositories/home:/Alexander_Pozdnyakov/CentOS_7/noarch/tesseract-langpack-osd-4.00~git30-5.5.noarch.rpm

87  rpm –nodeps -ivh tesseract-langpack-osd-4.00~git30-5.5.noarch.rpm

88  rpm -ivh tesseract-4.1.3+git4271-3.1.x86_64.rpm

90  wget –no-check-certificate  https://download.opensuse.org/repositories/home:/Alexander_Pozdnyakov/CentOS_7/x86_64/leptonica-1.76.0-2.5.x86_64.rpm

92  yum install libjpeg

94  yum install libtiff

96  yum install libwebp

97  rpm -ivh leptonica-1.76.0-2.5.x86_64.rpm

98  rpm -ivh tesseract-4.1.3+git4271-3.1.x86_64.rpm

[root@centos7-min OCR]# tox
ROOT: No tox.ini or setup.cfg or pyproject.toml found, assuming empty tox.ini at /root/OCR
py: OK (0.05 seconds)
congratulations :) (0.15 seconds)
[root@centos7-min OCR]#
=====================
https://github.com/tesseract-ocr/tesseract
https://tesseract-ocr.github.io/tessdoc/Installation.html
https://download.opensuse.org/repositories/home:/Alexander_Pozdnyakov/CentOS_7/
# Import modules
from PIL import Image
import pytesseract
# Include tesseract executable in your path
#pytesseract.pytesseract.tesseract_cmd = r”./Tesseract-OCR/tesseract.exe”
pytesseract.pytesseract.tesseract_cmd = r”/usr/bin/tesseract”
# Create an image object of PIL library
image = Image.open(’example.jpg’)
# pass image into pytesseract module
# pytesseract is trained in many languages
image_to_text = pytesseract.image_to_string(image, lang=’eng’)
# Print the text
print(image_to_text)
=========== Dockerfile =================
FROM jdeathe/centos-ssh
RUN yum -y install httpd php mod_php
COPY ./index.php /var/www/html/index.php
RUN touch /var/www/html/counter
RUN chmod 666 /var/www/html/counter
RUN echo "KeepAlive Off" >> /etc/httpd/conf/httpd.conf
RUN mkdir -p /_100MB/facecrop
RUN chmod 777 /_100MB/facecrop
WORKDIR /opt
RUN yum install -y  gcc openssl-devel bzip2-devel libffi-devel zlib-devel xz-devel
RUN yum -y install wget
RUN cd /opt
RUN wget https://www.python.org/ftp/python/3.7.11/Python-3.7.11.tgz
RUN tar -xvf Python-3.7.11.tgz
RUN cd /opt/Python-3.7.11
WORKDIR /opt/Python-3.7.11
RUN chmod 755 configure
RUN ./configure -enable-optimizations
RUN make altinstall
#RUN yum install -y https://repo.ius.io/ius-release-el7.rpm
#RUN yum install -y python36u python36u-libs python36u-devel python36u-pip
#RUN yum install epel-release
RUN python3.7 -m ensurepip
RUN pip3.7 install pytesseract
RUN pip3.7 install tox
RUN wget --no-check-certificate https://download.opensuse.org/repositories/home:/Alexander_Pozdnyakov/CentOS_7/x86_64/tesseract-4.1.3+git4271-3.1.x86_64.rpm
RUN wget --no-check-certificate https://download.opensuse.org/repositories/home:/Alexander_Pozdnyakov/CentOS_7/noarch/tesseract-langpack-eng-4.00~git30-5.5.noarch.rpm
RUN rpm -ivh tesseract-langpack-eng-4.00~git30-5.5.noarch.rpm
RUN wget --no-check-certificate https://download.opensuse.org/repositories/home:/Alexander_Pozdnyakov/CentOS_7/noarch/tesseract-langpack-osd-4.00~git30-5.5.noarch.rpm
RUN rpm --nodeps -ivh tesseract-langpack-osd-4.00~git30-5.5.noarch.rpm
RUN wget --no-check-certificate https://download.opensuse.org/repositories/home:/Alexander_Pozdnyakov/CentOS_7/x86_64/leptonica-1.76.0-2.5.x86_64.rpm
RUN yum -y install libjpeg libtiff libwebp libpng
RUN rpm -ivh leptonica-1.76.0-2.5.x86_64.rpm
RUN rpm -ivh tesseract-4.1.3+git4271-3.1.x86_64.rpm
RUN yum -y install libglvnd-devel
RUN pip3.7 install opencv-python==4.6.0.66
WORKDIR /opt
COPY . /opt
EXPOSE 80
CMD ["sh","-c","python3.7 detect.py --image harry.jpg"]
#"/usr/sbin/httpd ; sleep 5; tail -f /var/log/httpd/access_log"]