在数据探查、分析时,经常会将CSV,XML或JSON等文件加载到 pandas DataFrame中,
但DataFrame不能直接使用SQL进行分析
本文提供两种示例,将DataFrame写入sqllite, 然后再进行SQL分析
import pandas as pd import sqlite3 from sqlalchemy import create_engine from sqlalchemy.orm import sessionmaker data = { 'product_name': ['Computer','Tablet','Monitor','Printer'], 'price': [900,300,450,150] } df = pd.DataFrame(data, columns= ['product_name','price']) engine = create_engine('sqlite:///test.db', echo=False) df.to_sql('products', engine.connect(),if_exists='replace', index = False) # 基于pandas查询数据 pd.read_sql("SELECT * FROM products WHERE price = (SELECT max(price) FROM products)",engine) #基于sqlalchemy查询数据 Session = sessionmaker(bind=engine) db_session = Session() for row in db_session.execute('select * from products'): print(row)
import pandas as pd import sqlite3 def query_sql(conn,sql): c=conn.cursor() c.execute(sql) return c.fetchall() def execute_sql(conn,sql): c=conn.cursor() c.execute(sql) conn.commit() conn = sqlite3.connect('test_database') execute_sql(conn,'CREATE TABLE IF NOT EXISTS products (product_name text, price number)') data = { 'product_name': ['Computer','Tablet','Monitor','Printer'], 'price': [900,300,450,150] } df = pd.DataFrame(data, columns= ['product_name','price']) df.to_sql('products', conn, if_exists='replace', index = False) df = pd.DataFrame(query_sql(conn,'SELECT * FROM products WHERE price = (SELECT max(price) FROM products)'), columns=['product_name','price']) print (df)