Python Tutorial
CS 418/BIO 365
30 August 2016
# This is a comment
""" This is a
multi-
line
comment"""
''' This is also a
multi-
line
comment'''
# How do I declare variables?
# String
my_string = 'Hello, CS 418/BIO 365!!'
my_other_string = "Hello, friends."
# Numbers
my_number = 418
my_other_number = 365.578
# List
my_list = ['this', 'is', 'fun', 56]
# or
my_other_list = []
# or
my_other_list2 = list()
my_other_list.append('bioinformatics')
# Dictionary (Map)
my_dict = {'key': 'value', 'other_key': 'other_value'}
# or
my_other_dict = {}
# or
my_other_dict2 = dict()
my_other_dict['my_key'] = 'my_value'
# How do I make loops?
# NOTE: Whitespace matters!!
# for loop
for i in my_list:
print(i)
# NOTE: "print(i)" is used in Python 3, "print i" is used in Python 2
this
is
fun
56
# for loop over range
for i in range(0, 5):
my_other_list.append(i)
print(my_other_list)
['bioinformatics', 0, 1, 2, 3, 4]
# while loop
while len(my_list) > 0:
del my_list[-1]
print(my_list)
['this', 'is', 'fun']
['this', 'is']
['this']
[]
# break
for val in my_other_list:
if val is 2:
break
else:
print(val)
bioinformatics
0
1
# continue
for val in my_other_list:
if val is not 2:
continue
print(val)
2
# How do I perform file I/O?
# file input
import sys # this imports the library to get command line arguments
with open(sys.argv[1]) as fh: # sys.argv[1] is the first command line argument, sys.argv[2] is the second ... and so on.
my_first_line = next(fh) # Python 2 & 3
my_next_line = fh.next() # Python 2
with open(sys.argv[1]) as other_fh:
# iterate over all lines in file
for line in other_fh:
print(line)
ACGTTGCATGTCGCATGATGCATGAGAGCT
4
# file output
with open('./my_file.txt', 'w') as writable: ''' NOTE: the second
argument ('w') makes this file writable '''
writable.write('This is my test file\n')
writable.write(my_first_line)
# How do I manipulate strings?
# string slice
my_string = 'banana'
print(my_string[3])
print(my_string[0:5])
print(my_string[1:5])
print(my_string[-1])
print(my_string[0:-2])
a
banan
anan
a
bana
Rosalind Tutorial
Use your NetID as your username!!
# What do we need to do in pseudo code?
''' -Read in the sequence and the
length of the kmer (a kmer is essentially a substring)
-Break up the sequence into kmers
-Count each kmer
-Find the kmer(s) with the highest counts
-Print out the highest count kmers'''
# Read in the sequence and the length of the kmer from a file
import sys
with open(sys.argv[1]) as file:
seq = next(file).strip() # REMEMBER: in Python 2 use file.next()
kmer_len = int(next(file).strip())
# Break up the sequence into kmers
for i in range(0, len(seq)):
kmer = seq[ i : i + kmer_len]
# Count each kmer
counts = {}
for i in range(0, len(seq)):
kmer = seq[ i : i + kmer_len]
if kmer not in counts:
counts[kmer] = 1
else:
counts[kmer] += 1
# Find the kmer(s) with the highest counts
max_count = 0
max_kmers = []
for kmer, count in counts.items(): # NOTE: use counts.iteritems() in Python 2
if count > max_count:
max_count = count
max_kmers.clear()
max_kmers.append(kmer)
elif count == max_count:
max_kmers.append(kmer)
# Print out the kmers with the highest counts
print(" ".join(max_kmers))