数据处理 Sql Basic

SQL(Structured Query Language)==是“结构化查询语言”,它是对关系型数据库的操作语言。它可以应用到所有关系型数据库中,例如:MySQL、Oracle、SQL
虽然 SQL 可以用在所有关系型数据库中,但很多数据库还都有标准之后的一些语法,我们可以称之为“方言”。例如 MySQL 中的 LIMIT 语句就是 MySQL 独有的方言,其它数据库都不支持!当然,Oracle 或 SQL Server 都有自己的方言。
模型训练需要使用大量的数据 使用python读取数据库再用 pandas 进行操作 占用时间/内存资源 并且处理大量数据时效率较低
实际使用时直接在数据库里进行简单的字符串操作/数据简单的加减乘除等 最后把处理后的数据结果表拉近python用于模型训练等操作

sql正则 查找

1
2
3
4
5
6
SELECT name FROM person_tbl WHERE name REGEXP '^st'  			查找name字段中以'st'为开头的所有数据
SELECT name FROM person_tbl WHERE name REGEXP 'ok$' 查找name字段中以'ok'为结尾的所有数据
SELECT name FROM person_tbl WHERE name REGEXP 'mar' 查找name字段中包含'mar'字符串的所有数据
SELECT name FROM person_tbl WHERE name REGEXP '^[aeiou]|ok$' 查找name字段中以元音字符开头或以'ok'字符串结尾的所有数据
SELECT DISTINCT TABLE_NAME FROM information_schema.COLUMNS WHERE COLUMN_NAME = 'description' AND TABLE_SCHEMA='bugs' ;
从整个数据库查找某个字段

基本条件查找

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
/*一般性where语句*/
SELECT population FROM world WHERE name = 'France'
SELECT name, population FROM world WHERE name IN ('Brazil', 'Russia', 'India', 'China');
SELECT name, area FROM world WHERE area BETWEEN 250000 AND 300000

/*字符串模式匹配*/
SELECT name FROM world WHERE name LIKE 'Y%' /*以y开头*/
SELECT name FROM world WHERE name LIKE 'C%ia' /*以C开头ia结尾*/
SELECT name FROM world WHERE name LIKE '_n%' ORDER BY name /*第二个字符是n,下划线是字母通配符*/

SELECT name FROM world WHERE name LIKE '%a%' AND name LIKE '%e%' AND name LIKE '%i%' AND name LIKE '%o%' AND name LIKE '%u%' AND name NOT LIKE '% %'

/*ROUND(*, n), n为正则精确到小数点位数,n为负则精确到相应位数*/
SELECT name, ROUND(population/1000000,2) FROM world WHERE continent='South America'
SELECT name, ROUND(gdp/population,-3) FROM world WHERE gdp>1000000000000

/* LENGTH()*/
SELECT name, capital FROM world WHERE LENGTH(name)=LENGTH(capital)
/* LEFT(*,n)前n个字符,<>,或!=不等于*/
SELECT name,capital FROM world WHERE LEFT(name,1)=LEFT(capital,1) AND name<>capital
/* SUBSTR(str, -2) str最后两个字符 */
select first_name from employees order by substr(first_name, -2)

/*LIMIT n从0开始取n个数据,LIMIT m,n从m开始取n个数据*/
select * from employees order by hire_date desc limit 2,1 /*取第三个数据*/

/* ||连接符 */
select last_name || ' ' || first_name from employees

/* distinct去重,常用去重计算和去重得到值唯一表 */
SELECT title, COUNT(DISTINCT emp_no) AS t FROM titles
GROUP BY title HAVING t >= 2

/* CAST()改变数据类型 */
SELECT CAST(f.population AS FLOAT)

SELECT CONCAT('王','(','ni',')')
FROM hr.ods_employee_delta_1d AS e

SELECT CONCAT(name,'(',ename,')')
FROM user_list

join 语句

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
select s.*, d.dept_no from salaries as s
inner join dept_manager as d on d.emp_no=s.emp_no
where d.to_date='9999-01-01' and s.to_date='9999-01-01'

/* ,号也可以查询两张表,效果同inner join连接两张表*/
SELECT e.emp_no, s.salary FROM employees AS e, salaries AS s
WHERE e.emp_no = s.emp_no AND e.hire_date = s.from_date
ORDER BY e.emp_no DESC

/* GROUP BY聚合后MAX(), MIN(), AVG(),SUM(), COUNT() 等操作*/
select de.dept_no, de.emp_no, max(s.salary)
from dept_emp as de
inner join salaries as s
on s.emp_no = de.emp_no
where de.to_date = '9999-01-01'
group by de.dept_no

/*多层连接*/
select e.emp_no, (s1.salary-s2.salary) growth
from employees as e
inner join salaries as s1
on e.emp_no = s1.emp_no and s1.to_date = '9999-01-01'
inner join salaries as s2
on e.emp_no = s2.emp_no and e.hire_date = s2.from_date
order by growth asc

Powered by Hexo and Hexo-theme-hiker

Copyright © 2013 - 2024 HELLO WORLD All Rights Reserved.

UV : | PV :