Working with CSV & Excel Files

  1. CSV

  2. Excel

Reading CSV Files:

Example 1:

# importing csv module 
import csv 

# csv file name 
filename = "C:/Users/Shalki/Desktop/python/Book1.csv"

# initializing the titles and rows list 
fields = [] 
rows = [] 

# reading csv file 
with open(filename, 'r') as csvfile: 
	# creating a csv reader object 
	csvreader = csv.reader(csvfile) 
	
	# extracting field names through first row 
	fields = next(csvreader) 

	# extracting each data row one by one 
	for row in csvreader: 
		rows.append(row) 

	# get total number of rows 
	print("Total no. of rows: %d"%(csvreader.line_num)) 

# printing the field names 
print('Field names are:' + ', '.join(field for field in fields)) 

# printing first 5 rows 
print('\nFirst 5 rows are:\n') 
#print(rows[1])
for row in rows[:5]: 
	# parsing each column of a row 
	for cell in row: 
		print("%10s"%cell, end="\t")
	print('\n') 

Output:

Example 2:

import csv
file1 = "C:/Users/Shalki/Desktop/python/Book1.csv"
with open(file1, 'r') as f:
    reader = csv.reader(f)
    for row in reader:
        rows.append(row)
for row in rows[:5]:
    print(row)

['Sonam', '23', 'gurgaon', 'haryana', '9999222200']

['Arun', '65', 'patna', 'bihar', '7777222211']

['Nitin', '76', 'faridabad', 'haryana', '6666000011']

['Payal', '58', 'noida', 'up', '2222555599']

['Jyoti', '99', 'delhi', 'delhi', '4444555577']

Writing CSV Files:

# importing the csv module 
import csv 

# field names 
fields = ['Name', 'Branch', 'Year', 'CGPA'] 

# data rows of csv file 
rows = [ ['Nikhil', 'COE', '2', '9.0'], 
		['Sanchit', 'COE', '2', '9.1'], 
		['Aditya', 'IT', '2', '9.3'], 
		['Sagar', 'SE', '1', '9.5'], 
		['Prateek', 'MCE', '3', '7.8'], 
		['Sahil', 'EP', '2', '9.1']] 

# name of csv file 
filename = "C:/Users/Shalki/Desktop/python/Book1.csv"

# writing to csv file 
with open(filename, 'w') as csvfile: 
	# creating a csv writer object 
	csvwriter = csv.writer(csvfile) 
	
	# writing the fields 
	csvwriter.writerow(fields) 
	
	# writing the data rows 
	csvwriter.writerows(rows)

Writing Dictionary to csv file

# importing the csv module 
import csv 

# my data rows as dictionary objects 
mydict =[{'branch': 'COE', 'cgpa': '9.0', 'name': 'Nikhil', 'year': '2'}, 
		{'branch': 'COE', 'cgpa': '9.1', 'name': 'Sanchit', 'year': '2'}, 
		{'branch': 'IT', 'cgpa': '9.3', 'name': 'Aditya', 'year': '2'}, 
		{'branch': 'SE', 'cgpa': '9.5', 'name': 'Sagar', 'year': '1'}, 
		{'branch': 'MCE', 'cgpa': '7.8', 'name': 'Prateek', 'year': '3'}, 
		{'branch': 'EP', 'cgpa': '9.1', 'name': 'Sahil', 'year': '2'}] 

# field names 
fields = ['name', 'branch', 'year', 'cgpa'] 

# name of csv file 
filename = "C:/Users/Shalki/Desktop/python/Book1.csv"

# writing to csv file 
with open(filename, 'w') as csvfile: 
	# creating a csv dict writer object 
	writer = csv.DictWriter(csvfile, fieldnames = fields) 
	
	# writing headers (field names) 
	writer.writeheader() 
	
	# writing data rows 
	writer.writerows(mydict) 

Reading an excel file using Python:

# Reading an excel file using Python 
import xlrd 
  
# Give the location of the file 
loc = "C:/Users/Shalki/Desktop/python/display.xlsx"
  
# To open Workbook 
wb = xlrd.open_workbook(loc) 
sheet = wb.sheet_by_index(0) 
  
# For row 0 and column 0 
print(sheet.cell_value(0,0)) 

Output:

'NAME'

Extract number of rows in Excel:

# Program to extract number 
# of rows using Python 
import xlrd 
  
# Give the location of the file 
loc = ("path of file") 
  
wb = xlrd.open_workbook(loc) 
sheet = wb.sheet_by_index(0) 
sheet.cell_value(0, 0) 
  
# Extracting number of rows 
print(sheet.nrows)

Output:

4

Extract number of columns:

#Program to extract number of 
# columns in Python 
import xlrd 
  
loc = ("path of file") 
  
wb = xlrd.open_workbook(loc) 
sheet = wb.sheet_by_index(0) 
  
# For row 0 and column 0 
sheet.cell_value(0, 0) 
  
# Extracting number of columns 
print(sheet.ncols) 

Output:

3

Extract a particular row value:

# Program to extract a particular row value 
import xlrd 
  
loc = ("path of file") 
  
wb = xlrd.open_workbook(loc) 
sheet = wb.sheet_by_index(0) 
  
sheet.cell_value(0, 0) 
  
print(sheet.row_values(1)) 

Output:

['ALEX', 4.0, 2011272.0]]

Extract First Column

# Program extracting first column 
import xlrd 
  
loc = ("C:/Users/Shalki/Desktop/python/display.xlsx") 
  
wb = xlrd.open_workbook(loc) 
sheet = wb.sheet_by_index(0) 
sheet.cell_value(0, 0) 
  
for i in range(sheet.nrows): 
    print(sheet.cell_value(i, 0)) 

Name

Rahul

Kapil

Creating/Writing in New Excel Workbook, Worksheet in Python

from openpyxl import Workbook

workbook = Workbook()
sheet12 = workbook.active

sheet12["A1"] = "hello"
sheet12["B1"] = "world!"

workbook.save(filename="C:/Users/Shalki/Desktop/python/display1.xlsx")

Reading all worksheet names and cells data of existing workbook:

from openpyxl import load_workbook
workbook = load_workbook(filename="C:/Users/Shalki/Desktop/python/display1.xlsx")
a = []
a = workbook.sheetnames

for i in a:
    print(i)
    

sheet = workbook.active

print(sheet["A1"].value)

print(sheet.cell(row=8, column=6).value)

Sheet

Sheet1

Sheet2

London

Australia

Iterating through Data

from openpyxl import load_workbook
workbook = load_workbook(filename="C:/Users/Shalki/Desktop/python/display1.xlsx")
sheet = workbook.active

for row in sheet.iter_rows(min_row=1,max_row=2,min_col=1,max_col=3,values_only=True):
    print(row)

('gyansetu', 46, 976)

(324, 45, 57)

Reading data from a particular worksheet

from openpyxl import load_workbook
workbook = load_workbook(filename="C:/Users/Shalki/Desktop/python/display1.xlsx")
a = []
a = workbook.sheetnames

for s in range(len(workbook.sheetnames)):
    if workbook.sheetnames[s] == 'Sheet1':
        break
    
workbook.active = s
sheet = workbook.active

print(sheet["A1"].value)

print(sheet.cell(row=8, column=6).value)

Sheet

Sheet1

Sheet2

Hello

Students

Putting data in a Dictionary:

import json
from openpyxl import load_workbook

workbook = load_workbook(filename="C:/Users/Shalki/Desktop/python/display1.xlsx")
sheet = workbook.active

products = {}

# Using the values_only because you want to return the cells' values
for row in sheet.iter_rows(min_row=2,
                           min_col=4,
                           max_col=5,
                           values_only=True):
    product_id = row[0]
    product = {
        "parent": row[1],
    }
    products[product_id] = product

# Using json here to be able to format the output for displaying later
print(products)
print("The code returns json format ")
print(json.dumps(products))

{2: {'parent': 'Shalki'}, 3: {'parent': 'Sanchi'}, 4: {'parent': 'Milanz'}, 5: {'parent': 'Anuj'}}

The code returns json format

{"2": {"parent": "Shalki"}, "3": {"parent": "Sanchi"}, "4": {"parent": "Milanz"}, "5": {"parent": "Anuj"}}

Appending Excel/New data

from openpyxl import load_workbook

# Start by opening the spreadsheet and selecting the main sheet
workbook = load_workbook(filename="C:/Users/Shalki/Desktop/python/display1.xlsx")
sheet = workbook.active

# Write what you want into a specific cell
sheet["C1"] = "writing ;)"

# Save the spreadsheet
workbook.save(filename="C:/Users/Shalki/Desktop/python/display1_append.xlsx")

Insert/Delete Row & Column

from openpyxl import load_workbook
workbook = load_workbook(filename="C:/Users/Shalki/Desktop/python/display1.xlsx")
sheet = workbook.active

def print_rows():
    for row in sheet.iter_rows(values_only=True):
        print(row)

print_rows()
('hello', 'world!')

# Insert a column before the existing column 1 ("A")
sheet.insert_cols(idx=1)
print_rows()
(None, 'hello', 'world!')

workbook.save(filename="C:/Users/Shalki/Desktop/python/display2_append.xlsx")

 # Insert 5 columns between column 2 ("B") and 3 ("C")
sheet.insert_cols(idx=3, amount=5)
print_rows()
(None, 'hello', None, None, None, None, None, 'world!')
workbook.save(filename="C:/Users/Shalki/Desktop/python/display3_append.xlsx")

# Delete the created columns
sheet.delete_cols(idx=3, amount=5)
sheet.delete_cols(idx=1)
print_rows()
('hello', 'world!')
workbook.save(filename="C:/Users/Shalki/Desktop/python/display4_append.xlsx")

# Insert a new row in the beginning
sheet.insert_rows(idx=1)
print_rows()
(None, None)
('hello', 'world!')

# Insert 3 new rows in the beginning
sheet.insert_rows(idx=1, amount=3)
print_rows()
(None, None)
(None, None)
(None, None)
(None, None)
('hello', 'world!')

workbook.save(filename="C:/Users/Shalki/Desktop/python/display5_append.xlsx")
# Delete the first 4 rows
sheet.delete_rows(idx=1, amount=4)
print_rows()
('hello', 'world!')

workbook.save(filename="C:/Users/Shalki/Desktop/python/display6_append.xlsx")

Reading/Creating/Renaming Excel Worksheets

from openpyxl import load_workbook
workbook = load_workbook(filename="C:/Users/Shalki/Desktop/python/display1.xlsx")
sheet = workbook.active

workbook.sheetnames
['Products', 'Company Sales']

# You can select a sheet using its title
products_sheet = workbook["Products"]
sales_sheet = workbook["New Sales"]
products_sheet["C3"] = "writing ;)"

#renaming a worksheet
sales_sheet.title = "New Sales1"

# You can also define the position to create the sheet at
hr_sheet = workbook.create_sheet("HR", 2)

workbook.save(filename="C:/Users/Shalki/Desktop/python/display1.xlsx")

Adding Filters:

Filters and sorts can only be configured by openpyxl but will need to be applied in applications like Excel. This is because they actually rearranges or format cells or rows in the range.

from openpyxl import load_workbook
workbook = load_workbook(filename="C:/Users/Shalki/Desktop/python/display1.xlsx")
sheet = workbook.active

workbook.sheetnames
['Products', 'Company Sales']

# You can select a sheet using its title
products_sheet = workbook["New Sales1"]

#'A1:F10'

products_sheet.auto_filter.ref = products_sheet.dimensions
products_sheet.auto_filter.add_filter_column(1, ["Payal", "Kunal"])
workbook.save(filename="C:/Users/Shalki/Desktop/python/display3.xlsx")
products_sheet.auto_filter.add_sort_condition("B2:B15")

workbook.save(filename="C:/Users/Shalki/Desktop/python/display1.xlsx")

Excel Formulas

from openpyxl.utils import FORMULAE
from openpyxl import load_workbook
workbook = load_workbook(filename="C:/Users/Shalki/Desktop/python/display1.xlsx")
HR_sheet = workbook["HR"]
HR_sheet["B10"] = "=AVERAGE(B2:B6)"

#HR_sheet["A7"] = '=SUMIF(A1:A6,"aa",B1:B6)'
HR_sheet["A7"] = '=COUNTIF(B1:B5, ">100")'

workbook.save(filename="C:/Users/Shalki/Desktop/python/display1.xlsx")

Adding Charts

from openpyxl import Workbook
from openpyxl.chart import BarChart, Reference
workbook = load_workbook(filename="C:/Users/Shalki/Desktop/python/display1.xlsx")
HR_sheet = workbook["HR"]
chart = BarChart()
data = Reference(worksheet=HR_sheet,
                 min_row=1,
                 max_row=5,
                 min_col=1,
                 max_col=2)

chart.add_data(data, titles_from_data=False)
HR_sheet.add_chart(chart, "E2")

workbook.save(filename="C:/Users/Shalki/Desktop/python/display1.xlsx")

Ref:- https://realpython.com/openpyxl-excel-spreadsheets-python/

Last updated