80 lines
2.5 KiB
Python

from io import BytesIO
from openpyxl import load_workbook
def excel_dict_reader(fhand, sheet_name, mandatory_column_name=None):
fhand.seek(0)
wb = load_workbook(filename=BytesIO(fhand.read()), data_only=True,
read_only=True)
return workbook_sheet_reader(wb, sheet_name, mandatory_column_name=mandatory_column_name)
def is_none(value):
return value is None
def workbook_sheet_reader(workbook, sheet_name, mandatory_column_name=None,
allowed_empty_line_slots=5):
try:
sheet = workbook[sheet_name]
except KeyError as error:
raise ValueError(f"The '{sheet_name}' sheet is missing.") from error
first = True
header = []
empty_lines = 0
for row in sheet.rows:
values = []
for cell in row:
if cell.value is not None and cell.data_type == 's':
value = str(cell.value).strip()
else:
value = cell.value
values.append(value)
# values = [cell.value.strip() for cell in row]
if first:
header = values
first = False
continue
if not any(values):
empty_lines += 1
if empty_lines >= allowed_empty_line_slots:
break
continue
empty_lines = 0
data = dict(zip(header, values))
if mandatory_column_name is not None and not data[mandatory_column_name]:
# msg = f"Exiting before end of sheet {sheet_name} ends.\n"
# msg += f"Mandatory column ({mandatory_column_name}) empty. \n"
# msg += "Check file for empty lines"
# print(msg)
continue
yield data
def get_all_cell_data_from_sheet(workbook, sheet_name, allowed_empty_line_slots=5):
try:
sheet = workbook[sheet_name]
except KeyError as error:
raise ValueError(f"The '{sheet_name}' sheet is missing.") from error
empty_lines = 0
all_values = []
for row in sheet.rows:
values = []
for cell in row:
if cell.value is not None and cell.data_type == 's':
value = str(cell.value).strip()
else:
value = cell.value
values.append(value)
if not any(values):
empty_lines += 1
if empty_lines >= allowed_empty_line_slots:
break
continue
empty_lines = 0
all_values.extend(values)
return all_values