0 引言
本文是之前为了解决如何将文字贴到图片上而编写的代码,默认是如发票一类的,所以并未考虑透视变换等。且采用的是pygame粘贴方式,之前也尝试过opencv的seamlessClone粘贴。
值得注意的是,通过修改参数,增加各种干扰操作(羽化,噪音等等),生成的数据集看似丰富,其实因为内在的数据分布还是十分单一,也就是用该数据集去作为ocr的模型训练集,得到的模型仍然无法在现实生活场景上使用。因为在现实世界中,你光照角度,拍摄角度,打印机用墨等等都是一种变量,而这些变量都会让现实世界的票据呈现的文字内在数据分布是十分丰富的。而通过简单的代码生成的数据分布却并不能覆盖,或者说重叠其中一部分。故而,通过代码生成数据集的方式是无法解决ocr现实数据集不够的问题的。
所需要的操作:
1 - 下载; 2 - 将下面两份代码存成对应的get_color.py 和pygame_main.py; 3 - python 运行pygame_main.py即可。# get_color.pyimport cv2import pickle as cpimport numpy as npclass ColorSample(object): def __init__(self): '''colors_new.cp来自https://github.com/JarveeLee/SynthText_Chinese_version/tree/master/data/models/colors_new.cp ''' with open('colors_new.cp','rb') as f: self.colorsRGB = cp.load(f,encoding='latin-1') self.ncol = self.colorsRGB.shape[0]#4941 # convert color-means from RGB to LAB for better nearest neighbour # computations: self.colorsLAB = np.r_[self.colorsRGB[:,0:3], self.colorsRGB[:,6:9]].astype('uint8') self.colorsLAB = np.squeeze(cv2.cvtColor(self.colorsLAB[None,:,:],cv2.COLOR_RGB2Lab)) def sample_normal(self, col_mean, col_std): """ sample from a normal distribution centered around COL_MEAN with standard deviation = COL_STD. """ col_sample = col_mean + col_std * np.random.randn() return np.clip(col_sample, 0, 255).astype('uint8') def sample_from_data(self,bg_mat): """ bg_mat : this is a nxmx3 RGB image. returns a tuple : (RGB_foreground, RGB_background) each of these is a 3-vector. """ bg_orig = bg_mat.copy() bg_mat = cv2.cvtColor(bg_mat, cv2.COLOR_RGB2Lab) bg_mat = np.reshape(bg_mat, (np.prod(bg_mat.shape[:2]),3)) bg_mean = np.mean(bg_mat,axis=0) norms = np.linalg.norm(self.colorsLAB-bg_mean[None,:], axis=1) # choose a random color amongst the top 3 closest matches: #nn = np.random.choice(np.argsort(norms)[:3]) nn = np.argmin(norms) ## nearest neighbour color: data_col = self.colorsRGB[np.mod(nn,self.ncol),:] col1 = self.sample_normal(data_col[:3],data_col[3:6]) col2 = self.sample_normal(data_col[6:9],data_col[9:12]) if nn < self.ncol: return (col2, col1) else: # need to swap to make the second color close to the input backgroun color return (col1, col2)if __name__ =='__main__': fg_col,bg_col = sample_from_data(bgi)
# -*- coding: utf-8 -*-#pygame replace Imageimport osimport cv2import globimport mathimport randomimport numpy as npimport os.path as ospfrom xml.dom.minidom import Documentimport multiprocessing as mpimport loggingfrom PIL import Image,ImageDraw,ImageFontimport secretsimport pygamefrom pygame.locals import *from pygame import freetypeimport get_colorresultImgsDir = '/home/result_imgs' # 生成的图片存放位置resultXmlDir = '/home/result_xmls' # 生产的xml存放位置bgiDir = '/home/background_images' # 添加背景图片gTtf= '/home/ttfs' # 添加字体库totalFile = '/home/zzc/data/synth_recepit_text/result_200.txt' # 所需要添加的文字,一行一句(或者一行一个单词)FORMAT = '%(asctime)-15s [%(processName)s] %(message)s'logging.basicConfig(format = FORMAT)gBlockSize = 20 #每一个进程一次处理的句子ttfSize = [28,30,35,40,45,50,55,60,65]#====test#charset = [line.strip().split('\t')[1] for line in open('text/chars_gb2312').readlines()[:-1]]def _addSaltNoise(block,level = 10): '''添加椒盐噪声 ''' ran = np.random.randint(0,level,block.shape) salt = ran == 0 pepper = ran == level block[salt]= 0 block[pepper] = 255 return blockdef _addNoise(block,below=4,high =20): ''' 添加噪声''' randValue = np.random.randn(*block.shape)*np.random.randint(below,high) block = block+randValue block[block<0] = 0.0 block[block>255] = 255.0 block = block.astype('uint8') return blockdef _feather(block, height): ''' 对图片进行羽化''' # determine the gaussian-blur std: if height <= 30 : bsz = 0.25 ksz=1 elif 30 < height < 50: bsz = max(0.30, 0.5 + 0.1*np.random.randn()) ksz = 3 else: bsz = max(0.5, 1.5 + 0.5*np.random.randn()) ksz = 5#np.random.choice([1,3,5])#5 return cv2.GaussianBlur(block,(ksz,ksz),bsz)def _seamlessClone(obj,dst,center): ''' 进行前背景合成''' mask = 255 * np.ones(obj.shape, obj.dtype) #print('obj,shape:',obj.shape,' dst.shape:',dst.shape,' center:',center) try: mixed_clone = cv2.seamlessClone(obj, dst, mask, center, cv2.MIXED_CLONE) except Exception as e: print('exception:',obj.shape,dst.shape,mask.shape,center) raise e return mixed_clonedef _rander(bgiGame,string,rowStart,font,get_color): ''' 进行渲染''' isFailed = False width, height = bgiGame.get_size() '''sample the color ''' bgiNp = pygame.surfarray.array3d(bgiGame) fg_col,bg_col = get_color.sample_from_data(bgiNp) #fg_col = fg_col + np.random.randint(-3,3,[1,3]) fg_col = fg_col.squeeze() '''change the property of font ''' font.oblique = secrets.choice([False,True]) font.rotation = secrets.choice(range(-5,5)) test = font.render(string) txtwidth,txtheight = test[1].size if width-txtwidth < 0: isFailed = True colStart = secrets.randbelow(max(1, width-txtwidth)) if rowStart+txtheight > height or colStart+txtwidth>width or isFailed: return bgiGame,rowStart,0,0,0 '''render the text ''' try: font.render_to(bgiGame,(colStart,rowStart), string, fg_col) except: print('fg_col',fg_col) '''surface 2 numpy ''' bgiNp = pygame.surfarray.array3d(bgiGame) bgiNp = cv2.cvtColor(bgiNp.transpose([1,0,2]),cv2.COLOR_RGB2BGR) '''add noise and blur ''' block = bgiNp[rowStart:rowStart+txtheight,colStart:colStart+txtwidth,:] block = _addNoise(block,4,20) if secrets.choice(range(4))==0: block = _addSaltNoise(block,np.random.randint(70,80)) block = _feather(block,txtheight) block = _addNoise(block,2,20) if secrets.choice(range(4))==0: block = _addSaltNoise(block,np.random.randint(70,80)) #===== bgiNp[rowStart:rowStart+txtheight,colStart:colStart+txtwidth,:] = block '''numpy 2 surface ''' bgiNp = cv2.cvtColor(bgiNp.transpose([1,0,2]),cv2.COLOR_BGR2RGB) bgiGame = pygame.surfarray.make_surface(bgiNp) return bgiGame,rowStart,colStart,txtwidth,txtheight ''' bgi = _seamlessClone(block,bgi,center) return bgi '''def _paste(bgiGame,ttf,size,rowStart,curText,cols,get_color): #ttfont = ImageFont.truetype(ttf,size) ttfont = freetype.Font(ttf,size) curText = curText.strip() '''random the digit ''' numberLength = 10 digits = ['0','1','2','3','4','5','6','7','8','9'] if secrets.randbelow(numberLength) == 0: #curText = ''.join([str(random.randint(0,9)) for _ in range(shouldMaxNumTxt)]) curText = ''.join([secrets.choice(digits) for _ in range(numberLength)] ) string = curText '''random the dot ''' if secrets.randbelow(numberLength-2) == 0: dotInd = random.randint(1,numberLength-2) string = curText[:dotInd]+'.'+curText[dotInd+1:] else: string = curText '''如果maxNumText小于10,则跳过 ''' numText = len(string) if numText != numberLength: string = '' return None,None,None,None bgiGame,rowStart,colStart,txtwidth,txtheight = _rander(bgiGame,string,rowStart,ttfont,get_color) return bgiGame,string,rowStart,colStart,txtwidth,txtheightdef _xml(doc,anno,string,xminT,yminT,xmaxT,ymaxT): ''' 生成对应的xml''' if not string: return body = doc.createElement('object') anno.appendChild(body) name = doc.createElement('name') nameText = doc.createTextNode('text') name.appendChild(nameText) body.appendChild(name) content = doc.createElement('textContent') contentText = doc.createTextNode(string) content.appendChild(contentText) body.appendChild(content) bndbox = doc.createElement('bndbox') xmin = doc.createElement('xmin') ymin = doc.createElement('ymin') xmax = doc.createElement('xmax') ymax = doc.createElement('ymax') xminText = doc.createTextNode(str(xminT)) yminText = doc.createTextNode(str(yminT)) xmaxText = doc.createTextNode(str(xmaxT)) ymaxText = doc.createTextNode(str(ymaxT)) xmin.appendChild(xminText) ymin.appendChild(yminText) xmax.appendChild(xmaxText) ymax.appendChild(ymaxText) bndbox.appendChild(xmin) bndbox.appendChild(ymin) bndbox.appendChild(xmax) bndbox.appendChild(ymax) body.appendChild(bndbox)def paste(imgname,bgi,text,ttf,get_color): pygame.init() bgiGame = pygame.image.load(bgi) width,height = bgiGame.get_size() depth = bgiGame.get_bitsize()//8 # 选择当前行的间距 curRow = 0 curRowInter = random.randint(3,7) curRow += curRowInter # 随机选择字体大小 curTtfSize = random.choice(ttfSize) # 创建xml的文件头 doc = Document() anno = doc.createElement('Annotations') doc.appendChild(anno) imgNameNode = doc.createElement('imgName') imgNameNode.appendChild(doc.createTextNode(imgname)) anno.appendChild(imgNameNode) sizeNode = doc.createElement('size') widthNode = doc.createElement('width') widthNode.appendChild(doc.createTextNode(str(width))) sizeNode.appendChild(widthNode) heightNode = doc.createElement('height') heightNode.appendChild(doc.createTextNode(str(height))) sizeNode.appendChild(heightNode) depthNode = doc.createElement('depth') depthNode.appendChild(doc.createTextNode(str(depth))) sizeNode.appendChild(depthNode) anno.appendChild(sizeNode) # 循环的一行一行去将文字粘贴到对应的图片上 curCol = 0; numTextDone = 0 while curRow+curTtfSize <= width: # cur col point # cur row point '''paste the text on bgiGame ''' if curRow+curTtfSize <= width: # if curcols is bigger than 0.9*cols,then do not paste the line curText = secrets.choice(text) bgiGame,string,curRow,colStart,txtwidth,txtheight = _paste(bgiGame,ttf,curTtfSize,curRow,curText,width,get_color) if not string: continue numTextDone += 1 _xml(doc,anno,string,xminT = colStart,yminT = curRow,xmaxT = colStart+txtwidth,ymaxT = curRow+txtheight) curRow += txtheight curRow += curRowInter # cur intervel curRowInter = random.randint(3,6) # cur ttf size curTtfSize = random.choice(ttfSize) bgi = pygame.surfarray.array3d(bgiGame).transpose([1,0,2]) bgi = cv2.cvtColor(bgi,cv2.COLOR_RGB2BGR) return bgi, doc, numTextDonedef handle(indTexts): ind, texts = indTexts # 获取进程号 pid = os.getpid() # 随机获取颜色 getcolor = get_color.ColorSample() bgis = glob.glob( osp.join(bgiDir, '*.jpg') ) # 随机选择当前一张背景图 bgipath = random.choice(bgis) # 随机获取字体 ttf = random.choice(ttfs) # 调用paste函数进行操作 imgname = 'bgi{}_ind{}_pid{}_ttf{}.jpg'.format(osp.basename(bgipath),ind,pid,osp.basename(ttf)) bgiNp,doc,numTextDone = paste(imgname,bgipath,texts,ttf,getcolor) imgnamep = 'bgi{}_ind{}_{}Of{}_ttf{}.jpg'.format(osp.basename(bgipath),ind,numTextDone,len(texts),osp.basename(ttf)) logging.warn(imgnamep) # 将图片和xml写入到对应位置 cv2.imwrite(osp.join(resultImgsDir,imgname),bgiNp) xmlFileName = osp.join(resultXmlDir,'{}.xml'.format(imgname[:-4])) with open(xmlFileName, "w") as fxml: fxml.write(str(doc.toprettyxml(indent = " ", newl = "\n", encoding = "utf-8"),encoding = 'utf-8')) pygame.quit() returnif __name__ == '__main__': ''' 1 - 先读取文字行,然后按照进程个数进行划分''' total = [line.strip() for line in open(totalFile)] numP = 30 totalSP = [] inter = math.ceil(len(total)/gBlockSize) for i in range(inter): totalSP.append(total[i::inter]) '''2 - 开启多进程进行处理 ''' print('begin',len(totalSP)) p = mp.Pool(numP) p.map(handle, enumerate(totalSP))
结果如图:
下面是采用PIL和opencv的seamlessClone粘贴方式,只是PIL这个包进行文字粘贴的时候,不支持文字旋转,且简单的文字粘贴,好像pygame的结果和seamlessClone效果差不多。
# -*- coding: utf-8 -*-import osimport cv2import globimport mathimport randomimport numpy as npimport os.path as ospfrom xml.dom.minidom import Documentimport multiprocessing as mpimport loggingfrom PIL import Image,ImageDraw,ImageFontimport pygamefrom pygame.locals import *from pygame import freetypeimport get_colorresultImgsDir = 'crnn_result_imgs1'resultXmlDir = 'crnn_result_xmls1'bgiDir = 'bgi'gTtf= 'ttfs'totalFile = 'texts.txt'FORMAT = '%(asctime)-15s [%(processName)s] %(message)s'logging.basicConfig(format = FORMAT)gBlockSize = 20#num of each process's sentencesttfSize = [28,30,35,40,45,50,55,60,65]def _addSaltNoise(block,level = 10): ran = np.random.randint(0,level,block.shape) salt = ran == 0 pepper = ran == level block[salt]= 0 block[pepper] = 255 return blockdef _addNoise(block): randValue = np.random.randn(*block.shape)*np.random.randint(2,20) block = block+randValue block[block<0] = 0.0 block[block>255] = 255.0 block = block.astype('uint8') return blockdef _feather(block, height): # determine the gaussian-blur std: if height <= 30 : bsz = 0.25 ksz=1 elif 30 < height < 50: bsz = max(0.30, 0.5 + 0.1*np.random.randn()) ksz = 3 else: bsz = max(0.5, 1.5 + 0.5*np.random.randn()) ksz = 5#np.random.choice([1,3,5])#5 return cv2.GaussianBlur(block,(ksz,ksz),bsz)def _seamlessClone(obj,dst,center): mask = 255 * np.ones(obj.shape, obj.dtype) #print('obj,shape:',obj.shape,' dst.shape:',dst.shape,' center:',center) try: mixed_clone = cv2.seamlessClone(obj, dst, mask, center, cv2.MIXED_CLONE) except Exception as e: print('exception:',obj.shape,dst.shape,mask.shape,center) raise e return mixed_clonedef _rander(rawbgi,string,bgr,point,font,get_color): bgi = Image.fromarray(rawbgi) draw = ImageDraw.Draw(bgi) curCol,curRow = point fg_col,bg_col = get_color.sample_from_data(rawbgi) fg_col = fg_col + np.random.randint(-3,3,[1,3]) draw.text((curCol,curRow),string, tuple(fg_col.squeeze()), font=font) width,height = font.getsize(string) region = curCol,curRow,curCol+width,curRow+height bgi = np.array(bgi) block = bgi[curRow:curRow+height,curCol:curCol+width,:] block = _addNoise(block) block = _feather(block,height) block = _addNoise(block) block = _addSaltNoise(block,50) #===== # bgi[curRow:curRow+height,curCol:curCol+width,:] = block # return bgi #cv2.imwrite('/home/zzc/tmp111.jpg',block) center = (curCol+curCol+width)//2,(curRow+curRow+height)//2# width, height, channels = bgi.shape# center = height//2,width//2 bgi = _seamlessClone(block,bgi,center) return bgidef _paste(bgi,ttf,size,curRow,curCol,curText,cols,get_color): ttfont = ImageFont.truetype(ttf,size) maxNumText = math.floor((cols-curCol)/size) curText = curText.strip() '''random the digit ''' shouldMaxNumTxt = 10 if random.randint(0,9)==9 and maxNumText >= 4: curText = ''.join([str(random.randint(0,9)) for _ in range(shouldMaxNumTxt)]) string = curText '''random the dot ''' if random.randint(0,7)==7: dotInd = random.randint(1,shouldMaxNumTxt-2) string = curText[:dotInd]+'.'+curText[dotInd+1:] else: startInd = random.randint(0,max(0,len(curText)-shouldMaxNumTxt-1)) string = curText[startInd:startInd+shouldMaxNumTxt].strip() string= curText '''如果maxNumText小于10,则跳过 ''' if maxNumText < 10 or len(curText)<10: string = '' numText = len(string) if numText == 10 : bgr = [random.randint(100,254) for i in range(3)] bgi = _rander(bgi,string,bgr,(curCol,curRow),ttfont,get_color) else: string = '' #===== '''get printed width height ''' width,height = ttfont.getsize(string) return bgi,string,width,heightdef _xml(doc,anno,string,xminT,yminT,xmaxT,ymaxT): if not string: return body = doc.createElement('object') anno.appendChild(body) name = doc.createElement('name') nameText = doc.createTextNode('text') name.appendChild(nameText) body.appendChild(name) content = doc.createElement('textContent') contentText = doc.createTextNode(string) content.appendChild(contentText) body.appendChild(content) bndbox = doc.createElement('bndbox') xmin = doc.createElement('xmin') ymin = doc.createElement('ymin') xmax = doc.createElement('xmax') ymax = doc.createElement('ymax') xminText = doc.createTextNode(str(xminT)) yminText = doc.createTextNode(str(yminT)) xmaxText = doc.createTextNode(str(xmaxT)) ymaxText = doc.createTextNode(str(ymaxT)) xmin.appendChild(xminText) ymin.appendChild(yminText) xmax.appendChild(xmaxText) ymax.appendChild(ymaxText) bndbox.appendChild(xmin) bndbox.appendChild(ymin) bndbox.appendChild(xmax) bndbox.appendChild(ymax) body.appendChild(bndbox)def paste(imgname,bgi,text,ttf,ttfRandom,get_color): bgi = cv2.imread(bgi) rows,cols,depth = bgi.shape # bgi = Image.fromarray(bgi) # draw = ImageDraw.Draw(bgi) curRow = 0 curRowInter = random.randint(3,7) curRow += curRowInter curTtfSize = random.randint(0,len(ttfRandom)-1) #create the xml head doc = Document() anno = doc.createElement('Annotations') doc.appendChild(anno) imgNameNode = doc.createElement('imgName') imgNameNode.appendChild(doc.createTextNode(imgname)) anno.appendChild(imgNameNode) height,width,depth = rows,cols,depth sizeNode = doc.createElement('size') widthNode = doc.createElement('width') widthNode.appendChild(doc.createTextNode(str(width))) sizeNode.appendChild(widthNode) heightNode = doc.createElement('height') heightNode.appendChild(doc.createTextNode(str(height))) sizeNode.appendChild(heightNode) depthNode = doc.createElement('depth') depthNode.appendChild(doc.createTextNode(str(depth))) sizeNode.appendChild(depthNode) anno.appendChild(sizeNode) while curRow + ttfRandom[curTtfSize] <=rows: #cur col point curCol = random.randint(0,cols-1) #cur row point '''paste the text on bgi ''' if curCol < cols*0.9 and curRow+ttfRandom[curTtfSize] <= rows: #if curcols is bigger than 0.9*cols,then do not paste the line curText = text[random.randint(0,len(text)-1)] bgi,string,width,height = _paste(bgi,ttf,ttfRandom[curTtfSize],curRow,curCol,curText,cols,get_color) if not string: continue _xml(doc,anno,string,xminT = curCol,yminT = curRow,xmaxT = curCol+width,ymaxT = curRow+height) curRow += curRowInter curRow += ttfRandom[curTtfSize] #cur intervel curRowInter = random.randint(3,7) #cur ttf size curTtfSize = random.randint(0,len(ttfRandom)-1) return np.array(bgi), docdef handle(text): ind, text = text #pid pid = os.getpid() #background image getcolor = get_color.ColorSample() bgis = glob.glob( osp.join(bgiDir,'*.jpg') ) #select one background image curBgi = random.randint(0,len(bgis)-1) bgi = bgis[curBgi] #ttf ttfs = glob.glob(osp.join(gTtf,'*.ttf')) curTtf = random.randint(0,len(ttfs)-1) ttf = ttfs[curTtf] #ttf size random ttfRandom = [1]+[ random.randint(0,1) for i in range(len(ttfSize)-1)] ttfRandom = [ran*size for ran,size in zip(ttfRandom, ttfSize)] ttfRandom = [i for i in ttfRandom if i != 0] imgname = '{}_{}_{}.jpg'.format(ind,pid,curTtf) bgi,doc = paste(imgname,bgi,text,ttf,ttfRandom,getcolor) cv2.imwrite(osp.join(resultImgsDir,imgname),bgi) xmlFileName = osp.join(resultXmlDir,'{}.xml'.format(imgname[:-4])) with open(xmlFileName, "w") as fxml: fxml.write(str(doc.toprettyxml(indent = " ", newl = "\n", encoding = "utf-8"),encoding = 'utf-8')) logging.warn('{}'.format(ind)) returnif __name__ == '__main__': total = [line.strip() for line in open(totalFile)] numP = 30 totalSP = [] inter = math.ceil(len(total)/gBlockSize) for i in range(inter): totalSP.append(total[i::inter]) print('begin') p = mp.Pool(numP) p.map(handle, enumerate(totalSP[:1000]))