导图社区 Python基础入门
学习路径:讯飞AI大学《Python基础入门》、 Coursera <Crash Course on Python> (Google) 转载请注明出处 MindMaster @Tracy
编辑于2020-07-07 09:30:52Python
syntax
comment
#comment here #line by line
''' comment block '''
OR """ """
output
a = 10 b = 1
print(a)
#10
print(a + b - 3)
#8
str1 = "World"
print('Hello' + str1)
#HelloWorld
print('Hello', str1, 123)
#Hello World 123
input
yourName = input()
yourName = input('Please enter your name: ')
variables
bool
a = True
a = False
number
int
x = 1
float
y = x + 10.0
operators
+ - * % (keep int or float)
/ #divide (always float)
// #quotient (keep int or float)
** #exponential (keep int or float)
> < >= <= == != (True or False)
NO ++ --
str
' ' or " " or ''' '''
str = 'tracy'
str = 'Testing "123"'
#Testing "123"
str = "Testing '123'"
#Testing '123'
str = '1'+'3'+'8'
#138
str = '''Hello, Tracy Hello, world!'''
#Hello, Tracy Hello, world!
str = "Hello, Tracy \ Hello, world!"
#Hello, Tracy Hello, world!
param1 = 'thing' param2 = 'blank' str = "Some {0} to be filled in this {1}".format(param1, param2)
#Some thing to be filled in this blank
indexing:
[0] to [n-1]
[-n] to [-1]
str[start:end:step]
start:end 左闭右开区间
str[i:]
starting from str[i] to the end
str[::]
entire str
str[::x]
new string from str[0] and str[i]s where 0<i<n are all multiples of x
cannot assign value to str[i] or str[i:j:k]
functions:
len(str)
no. of characters
.upper()
changes to all big cap
.isupper()
.lower()
changes to all small cap
.islower()
.isalpha()
True if letters only (no space etc)
.isdigit()
True if numbers only
.find('x')
the first index saving 'x'
.replace('old', 'new')
replace with auto adjusting length
.count('x')
number of 'x' in the string
.split('char')
split string with 'char' into elements in a list
'char'.join(str/list)
'-'.join('some text')
#s-o-m-e- -t-e-x-t
''.join(list)
packs all elements in a list together as a str
typecasting
float to int
myInt = str(myfloat)
round down
number to str
strNum = '123'
OR " "
strNum = str(realnum)
str to number
realNum = int(strnum)
error if strnum is actually float
readlNum = float(strnum)
ascii to char
char = chr(asciiNum)
Getting the type of variable:
type(/*var*/)
Checking the type of variable (T/F):
isinstance(/*var*/, /*type*/)
regular expression
import re
re.findall(myRegex, str)
saves match results as a list
re.sub(myRegex, 'to sub', str)
substitutes all match results with 'to sub'
myRegex = r' '
\d
OR [0-9]
OR [0123456789]
\D
OR ^[0-9]
[a-z]
[A-Z]
data structures
list
list1 = []
list2 = list1
same address
list1 = [1, 'two', 3.0, 'four', ['li1', 'li2'], ('tu1', 'tu2'), {1:'di1', 2:'di2'}, mySet]
can include all data types
list1 = [i for i in range(start, end)]
#[start, start+1, ..., end-1]
list1 = list(range(start, end))
#[start, start+1, ..., end-1]
newList = list1 + list2
join lists without removing duplicates
list1 * n
copy n-1 times the list to the end
indexing:
[0] to [n-1]
[-n] to [-1]
list1[start:end:step]
start:end 左闭右开区间
negative step: in reversed order
can assign values to list1[i] or list1[i:j:k]
functions:
len(list1)
no. of elements
min/max(list1)
the element of greatest value
for list containing numbers only
.extend(list2)
add elements in list2 to the end
.append('element')
add an element to the end
'char'.join(list1)
join elements with 'char' to a string
.insert(i, 'element')
insert element before list1[i]
.pop(i)
remove and return the value of the ith element, default -1
.remove('element')
remove the first matching element
.reverse()
reverse elements
.index('element')
the index of the first matching element
.count('element')
no. of matching elements
.sort(reverse=/*True or False*/)
sort list with numbers descendingly or ascendingly
del list1[i]
remove the ith element
del list1
remove entire list
mutable
all list1/dict1/set1 changes together in all instances of the same class if it is declared as an variable in the class directly instead of within __init__()
tuple
tup1 = (1, 'two', 3.0, 'four')
tup1 = ('one element only', )
tup2 = tup1
same address
immutable
dict
dict1 = {}
dict2 = dict1
same address
dict1 = {'key1`: 'one', 'key2': 'two'}
keys are unique, assigned with the last value
any data type
dict1 = dict([(1, 'one'), (2, 'two')])
dict(list of tuples)
indexing:
dict1['key1']
functions:
.keys()
a list of keys
#dict_keys(['key1', 'key2', ...])
.values()
a list of values
#dict_values(['one', 'two', ...])
.items()
list of tuples(key, value)
#dict_items([('key1', 'one'), ('key2', 'two'), ...])
.clear()
clear all keys and values i.e. dict1 = {}
.update(dict2)
add keys and values in dict2 to dict1
del dict1['key']
remove matching key and value
del dict1
remove entire dictionary
set
set1 = {1,2,4,5,3,5}
auto remove duplicated & sort ascendingly
set1 = set([1,2,4,5,3,5])
set2 = set1
different address
newSet = set1 - set2
difference
newSet = set1 | set2
union
newSet = set1 & set2
intersection
newSet = set1 ^ set2
symmetric difference
set1 </<= set2
True if set1 ⊂/⊆ set2
no indexing
functions:
.add('element')
insert the element
.update(set2)
insert all non-duplicating elements in set2 to set1
.remove('element')
remove matching elements
mutable
type-casting
one to one
list/tuple/set(collection)
when collection is a dict, only keys are kept
multi to one
1. z = zip(col1, col2, ...)
when col is a dict, only keys are kept
2. list(z)/tuple(z)/set(z)
list/tuple/set containing n tuples(col1 ele i, col2 ele i, ...)
OR dict(z) if 2 collections
dict containing col1 as keys, col2 as values
address
get address:
id(/*var*/)
copy list/tuple/dict:
import copy
col2 = copy.deepcopy(col1)
flow control
logic operators
and or not
branch
if /*condition*/: #actions elif /*condition*/: #actions else: #actions
loop
while /*condition*/: #actions
for /*var*/ in /*obj*/: #actions
/*obj*/
range(start, end, step)
range左闭右开区间
default (0, ?, 1)
str
list
dict.keys()
OOP
class A: pass
class B: att1 = "" def meth1(self): return ... def __init__(self, x): self.att1 = x def __str__(self): print("This is class B. It has an attribute and a method.")
inheritanace
class D(B): att1 = "attribute one of derived class D from B"
composition
standard library
import random
random.randint(1, 100)
import datetime
now = datetime.datetime.now()
#YYYY-MM-DD hh::mm:ss.msmsms
delta = datetime.timedelta(days=28)
.year ETC
read-write
native
open file
f = open(/*filename*/, /*mode*/, encoding=/**/)
mode
'r'
read-only, default
'w'
write, content overwrites original file
'x'
open a new file and write
error if file exists
'a'
write, content adds to the end of original file
encoding=?
None
default
'utf-8'
read file
f.read()
read entire file in string
f.readline()
read line by line
pointer at EOL
f.readlines()
read entire file in list
attach:
.strip(chars)
remove leading and trailing characters of c/h/a/r/s
strip()
remove leading and trailing spaces
attach:
string functions
write file
f.write(/*str*/)
pointer position
f.seek(offset, pos)
offset
default 0
any positive int < n
pos
default 0
beginning of the file
1
current pointer position
2
end of the file
close file
f.close()
pandas
import pandas as pd
data = pd.read_table/csv/excel( /*filename*/, sep=/*char*/, header=/*None or int*/, encoding=/**/, names=/*list*/ )
sep=?
separate columns by any char
'\t'
table default
','
csv and excel default
header=?
None
default 0
0th line as header
any positive int < n
encoding=?
'utf-8'
names=?
names of column headers
default [0, 1, ...]
['col 1', 'col 2', ...]
data.to_csv/excel( /*filename*/, sep=/*char*/, header=/*int*/, encoding=/**/, index=/*bool*/ )
index=?
default True
keep the indexes of 0 to n-1 as the leftmost column
False
excel: formats index column and header row with solid border, bold font
pymysql
import pymysql
1. create connection object
conn = pymysql.connect( host=' ', port=/*int*/, user=' ', passwd=' ', db=' ', charset=' ' )
host=?
server address
e.g. '127.0.0.1', 'localhost'
port=?
e.g. 3306
* 80 or 443 are usually server ports, 3306 is sql port
user=?
e.g. 'root'
db=?
name of database
if omitted, access all databases
charset=?
e.g. 'utf8'
2. get cursor
an sql handle to retrieve all database resources
cur = conn.cursor()
SQL functions
sql_select = '''SELECT *
FROM db1.table1
WHERE /*conditions*/ '''
optional
e.g. col1 >= k
and, or
3. cursor processing
i. cur.execute(sql_select)
ii. data = cur.fetchall()
in tuple format
#((0, 'col1 item1', 'col2 item1', ...), (1, 'col1 item2', 'col2 item2', ...), ...)
data = cur.fetchmany(n)
fetch the following n entries
data = cur.fetchone()
4. tuple to data frame
import pandas as pd
df = pd.DataFrame( list(data), index=/*None or int*/, columns=['id', 'col 1', 'col 2', ...])
change to data frame format from pandas
keep original column headers:
columns=[i[0] for i in cur.description]
SSH connection
from sshtunnel import SSHTunnelForwarder
with SSHTunnelForwarder( ssh_address_or_host=('172.16.59.13', int('20222')), ssh_username='ssh username', ssh_password='ssh passwrord', local_bind_address=('127.0.0.1', int('2222')), remote_bind_address=('mysql server address', int('port no.'))
) as server: conn = pysql.connect( host='127.0.0.1', port=2222, user='mysql username', password='mysql password') cur = conn.cursor()
pandas
import pandas as pd
cast to df:
df = pd.DataFrame( )
/*dict*/
key as column names, listed values as rows
{ 'k1': [c1i1, c2i2, ...], 'k2': [c2i1, c2i2, ...], ... }
/*list*/, columns=[]
elements as rows
[ [c1i1, c2i1, ...], [c1i2, c2i2, ...], ...]
/*tuple*/, columns=[]
elements as rows
( (c1i1, c2i1, ...), (c1i2, c2i2, ...), ...)
cast to series:
s = pd.Series([data1, data2, ...], index=["r1", "r2"], name="c1"])
get df data type:
df.dtypes
OR df.colname.dtypes
#int64 OR float64 OR bool OR object
change df data type:
df.colname = df.colname.astype( )
int
#int32
float
#float64
bool
object
set index:
df.reset_index()
df.set_index(['r1', 'r2', ...])
df.set_index('Col1')
get index:
df.index
#RangeIndex(start=m, stop=n, step=x)
default df has m=0, n=no. of rows, x=1
list(df.index)
#[0, 1, ...]
get column names:
df.columns
#Index(['col 1', 'col 2', ...], dtype='object')
if unset, returns df.index
list(df.columns)
#['col 1', 'col2', ...]
if unset, returns list(df.index)
rename column names:
df = df.rename(columns={'old c1': 'new c1', ...})
subset observation:
by row
df.head(k)
the first k rows
default 5
df.tail(k)
the last k rows
df.fillna(value)
fill NaN cells with value
df.dropna()
remove rows will NaN cells
df.drop_duplicates()
df[start:end:x]
every x rows of index valued from start to end
df[/*conditions*/]
df.isnull().values==True
#[[True True False ...] ...]
df.notnull().values==True
* will produce duplicated rows if one row has more than one cell fulfil T/F
(df.col2>=x) & (df.col3<y)
ONLY USE & | , CANNOT USE and or
must add ( ) if more than one condition
df.query(/*sql-like expressions*/)
"col2>=x and col3<y"
by column
df[['col1', ...]]
only the listed columns
df['colname'] if select only one column
df/*functions*/
.filter(regex=' ')
columns of names matching regex rule
.iloc[:, [i, j, ...]]
ith, jth, ...th column
starts from 0th
.loc[:, 'coli':'colj']
coli to colj
.loc[/*conditions*/, /*col list*/]
df['col1'] > x, ['col1', 'col3']
listed columns with rows satisfying conditions
reshape data:
pivot
spread rows into columns
dfnew = pd.pivot_table( /*dfname*/, index=' ', columns=' ', values=' ', aggfunc=' ', fill_value=/*int*/ )
index, columns, values = ?
column names
aggfunc=?
used in values column
e.g. 'sum'
.reset_index()
make indexes from 0 to n-1, index above becomes first column
combine data:
concat
merge data frames by stacking rows or columns
better to keep column names or indexes the same, else will create new columns or rows
dfnew = pd.concat( [df1, df2, ...], axis=/*0 or 1*/ )
axis=?
default 0
stack rows
1
stack columns
append
dfnew = df1.append(df2)
append rows of df2 to df1
.reset_index()
merge
dfnew = df.merge( df1, df2, ..., on/left_on/right_on=' ', how=/**/ )
on/left_on/right_on=?
column name, serves as the key, all rows must have in both tables
e.g. id
on:
? is the key
left_on:
left column of ? is the key
right_on:
right column of ? is the key
how=?
merge and show columns for rows
'inner'
with common keys in df1 and df2
'left'
with keys available in df1
'right'
with keys available in df2
group data:
groupby
group = df.groupby(['coli', colj', ...])
data type: pandas data frame group
group according to these column values
['colx', 'coly', ...]
data type: pandas series
columns to be calculated with aggfunc
./*aggfunc*/
e.g. sum()