import os import comtypes.client import time from pdf2image import convert_from_path import shutil ###### this range second number plus one. for # r seq(1:10) equivalent to # python range(1, 11, 1) num_groups = range(2101, 2201, 1) #10 #this is now participants i=1 n=1 for i in num_groups: print(i) for n in range(1,7,1): wdFormatPDF = 17 # absolute path is needed # be careful about the slash '\', use '\\' or '/' or raw string r"..." in_file = 'C:/Users/fo06mafa/Documents/AuthenticApplications/finished_cvs/Group{1}/Resume_{0}_T1_Output.docx'.format(n,i) out_file = 'C:/Users/fo06mafa/Documents/AuthenticApplications/finished_cvs/Group{1}/Resume_{0}_T1_Output.pdf'.format(n,i) # move html files in_file_html = 'C:/Users/fo06mafa/Documents/AuthenticApplications/finished_cvs/Group{1}/ScrapedData{0}.html'.format(n,i) out_file_html = 'C:/Users/fo06mafa/oTree/cet_pre_screen/templates/data/Group{1}/ScrapedData{0}.html'.format(n,i) shutil.copyfile(in_file_html, out_file_html) # print out filenames #print (in_file) print (out_file) if os.path.isfile(out_file): os.remove(out_file) # create COM object word = comtypes.client.CreateObject('Word.Application') # key point 1: make word visible before open a new document word.Visible = True # key point 2: wait for the COM Server to prepare well. time.sleep(3) # convert docx file 1 to pdf file 1 doc=word.Documents.Open(in_file) # open docx file 1 doc.SaveAs(out_file, FileFormat=wdFormatPDF) # conversion #doc.SaveAs('C:/Users/fo06mafa/Documents/aaaaaaaaaaaaaaaaaaaaaaaaaaaaa.pdf', FileFormat=wdFormatPDF) doc.Close() # close docx file 1 word.Visible = False word.Quit() # close Word Application pages = convert_from_path('C:/Users/fo06mafa/Documents/AuthenticApplications/finished_cvs/Group{1}/Resume_{0}_T1_Output.pdf'.format(n,i), 500) if len(pages) > 1: print('@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ ERROR more than one page @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@') for page in pages: page.save('C:/Users/fo06mafa/oTree/_static/Resumes/Group{1}/Resume_{0}_T1_Output.jpg'.format(n,i), 'JPEG', optimize=True, quality=1) #= convert_from_path('C:/Users/fo06mafa/oTree/_static/CV_test/Chronological_Resume_1_Output.pdf', 500) #for page in pages: # page.save('C:/Users/fo06mafa/oTree/_static/CV_test/Chronological_Resume_1_Output.jpg', 'JPEG')