2.1.1 문자열에서 단어가 나오는 횟수 세기(LINQ)
class CountWords
static void Main()
string text = @"Historically, the world of data and the world of objects" +
@" have not been well integrated. Programmers work in C# or Visual Basic" +
@" and also in SQL or XQuery. On the one side are concepts such as classes," +
@" objects, fields, inheritance, and .NET Framework APIs. On the other side" +
@" are tables, columns, rows, nodes, and separate languages for dealing with" +
@" them. Data types often require translation between the two worlds; there are" +
@" different standard functions. Because the object world has no notion of query, a" +
@" query can only be represented as a string without compile-time type checking or" +
@" IntelliSense support in the IDE. Transferring data from SQL tables or XML trees to" +
@" objects in memory is often tedious and error-prone.";
string searchTerm = "data";
//Convert the string into an array of words
string[] source = text.Split(new char[] { '.', '?', '!', ' ', ';', ':', ',' }, StringSplitOptions.RemoveEmptyEntries);
// Create and execute the query. It executes immediately because a singleton value is produced. 즉시실행???
// Use ToLowerInvariant to match "data" and "Data"
var matchQuery = from word in source
where word.ToLowerInvariant() == searchTerm.ToLowerInvariant()
select word;
// Count the matches.
int wordCount = matchQuery.Count();
Console.WriteLine("{0} occurrences(s) of the search term \"{1}\" were found.", wordCount, searchTerm);
/* Output:
3 occurrences(s) of the search term "data" were found.
2.1.2 지정된 단어 집합이 들어 있는 문장 쿼리(LINQ)
//나눈 문장들에서 "Historically", "data" 및 "integrated"이라는 단어를 모두 포함한 문장을 반환.
// Split the text block into an array of sentences.
string[] sentences = text.Split(new char[] { '.', '?', '!' });
// Define the search terms. This list could also be dynamically populated at runtime.
string[] wordsToMatch = { "Historically", "data", "integrated" };
// let에서 문장들 나누고, Distinct로 중복된 문장제거, Intersect로 교집합연산의 개수가 wordsToMatch의 개수와 동일한 문장만 Select
// Note that the number of terms to match is not specified at compile time.
var sentenceQuery = from sentence in sentences
let w = sentence.Split(new char[] { '.', '?', '!', ' ', ';', ':', ',' },StringSplitOptions.RemoveEmptyEntries)
where w.Distinct().Intersect(wordsToMatch).Count() == wordsToMatch.Count()
select sentence;
// Execute the query. Note that you can explicitly type the iteration variable here even though sentenceQuery
// was implicitly typed.
foreach (string str in sentenceQuery)
2.1.3 문자열의 문자 쿼리(LINQ)
String 클래스는 제네릭 IEnumerable<(Of <(T>)>) 인터페이스를 구현하기 때문에 모든 문자열을 문자 시퀀스로 쿼리할 수 있다.
class QueryAString
static void Main()
string aString = "ABCDE99F-J74-12-89A";
// Select only those characters that are numbers 숫자로~~
IEnumerable<char> stringQuery =
from ch in aString
where Char.IsDigit(ch)
select ch;
// Execute the query
foreach (char c in stringQuery)
Console.Write(c + " ");
// Call the Count method on the existing query.
int count = stringQuery.Count();
Console.WriteLine("Count = {0}", count);
// Select all characters before the first '-'
IEnumerable<char> stringQuery2 = aString.TakeWhile(c => c != '-');
// Execute the second query
foreach (char c in stringQuery2)
Console.WriteLine(System.Environment.NewLine + "Press any key to exit");
/* Output:
Output: 9 9 7 4 1 2 8 9
Count = 8
2.1.4. LINQ 쿼리와 정규식 결합
Regex 클래스를 사용하여 텍스트 문자열에서 좀 더 복잡한 비교를 위해 정규식을 만드는 방법
class QueryWithRegEx
// This method assumes that the application has discovery permissions for all folders under the specified path.
static IEnumerable<System.IO.FileInfo> GetFiles(string path)
if (!System.IO.Directory.Exists(path))
throw new System.IO.DirectoryNotFoundException();
string[] fileNames = null;
List<System.IO.FileInfo> files = new List<System.IO.FileInfo>();
fileNames = System.IO.Directory.GetFiles(path, "*.*", System.IO.SearchOption.AllDirectories);
foreach (string name in fileNames)
files.Add(new System.IO.FileInfo(name));
return files;
public static void Main()
// Modify this path as necessary.
string startFolder = @"c:\program files\Microsoft Visual Studio 9.0\";
// Take a snapshot of the file system.
IEnumerable<System.IO.FileInfo> fileList = GetFiles(startFolder); //폴더의 모든 파일정보
// Create the regular expression to find all things "Visual".
System.Text.RegularExpressions.Regex searchTerm =
new System.Text.RegularExpressions.Regex(@"Visual (Basic|C#|C\+\+|J#|SourceSafe|Studio)");
// Search the contents of each .htm file.
// Remove the where clause to find even more matches!
// This query produces a list of files where a match was found, and a list of the matches in that file.
// Note: Explicit typing of "Match" in select clause.
// This is required because MatchCollection is not a generic IEnumerable collection.
var queryMatchingFiles =
from file in fileList
where file.Extension == ".htm" //확장자가 htm인 것들중에
let fileText = System.IO.File.ReadAllText(file.FullName) // 파일명만 fileText 목록(집합)으로두고
let matches = searchTerm.Matches(fileText) // fileText에서 searchTerm에 해당하는 것들만
where searchTerm.Matches(fileText).Count > 0 //이게 없으면 matches에 null도 들어가나??? 확인필요
select new
name = file.FullName,
matches = from System.Text.RegularExpressions.Match match in matches
select match.Value
// Execute the query.
Console.WriteLine("The term \"{0}\" was found in:", searchTerm.ToString());
foreach (var v in queryMatchingFiles)
// Trim the path a bit, then write the file name in which a match was found.
string s = v.name.Substring(startFolder.Length - 1);
// For this file, write out all the matching strings
foreach (var v2 in v.matches)
Console.WriteLine(" " + v2);
2.1.5. 두 목록 간의 차집합 구하기(LINQ)
names1.txt에 있지만 names2.txt에는 없는 해당 줄을 출력
class CompareLists
static void Main()
// Create the IEnumerable data sources.
string[] names1 = System.IO.File.ReadAllLines(@"../../../names1.txt");
string[] names2 = System.IO.File.ReadAllLines(@"../../../names2.txt");
// Create the query. Note that method syntax must be used here.
IEnumerable<string> differenceQuery =
// Execute the query.
Console.WriteLine("The following lines are in names1.txt but not names2.txt");
foreach (string s in differenceQuery)
2.1.6. 단어 또는 필드에 따라 텍스트 데이터 정렬 또는 필터링(LINQ)
쉼표로 구분된 값과 같은 구조화된 텍스트 줄을 해당 줄에서 임의의 필드를 기준으로 정렬하는 방법
scores.csv의 필드는 학생의 ID 번호와 4개의 테스트 점수를 나타낸다고 가정
public class SortLines
static void Main()
// Create an IEnumerable data source
string[] scores = System.IO.File.ReadAllLines(@"../../../scores.csv");
// Change this to any value from 0 to 4.
int sortField = 1; //두번째 필드 말하죠.. (0부터시작)
Console.WriteLine("Sorted highest to lowest by field [{0}]:", sortField);
// Demonstrates how to return query from a method.
// The query is executed here.
foreach (string str in RunQuery(scores, sortField))
// Returns the query variable, not query results!
static IEnumerable<string> RunQuery(IEnumerable<string> source, int num)
// Split the string and sort on field[num]
var scoreQuery = from line in source
let fields = line.Split(',')
orderby fields[num] descending
select line;
return scoreQuery;
/* Output (if sortField == 1):
Sorted highest to lowest by field [1]:
116, 99, 86, 90, 94
120, 99, 82, 81, 79
111, 97, 92, 81, 60
114, 97, 89, 85, 82
121, 96, 85, 91, 60
122, 94, 92, 91, 91
117, 93, 92, 80, 87
118, 92, 90, 83, 78
113, 88, 94, 65, 91
112, 75, 84, 91, 39
119, 68, 79, 88, 92
115, 35, 72, 91, 70
2.1.7. 구분된 파일의 필드 다시 정렬후 파일로 저장.
- spreadsheet1.csv라는 일반 텍스트 내용
-파일을 읽어 쉼표로 구분하고, 3번쨰 필드로 정렬한 후 다시 파일로 저장
// Create the IEnumerable data source
string[] lines = System.IO.File.ReadAllLines(@"../../../spreadsheet1.csv");
// Create the query. Put field 2 first, then
// reverse and combine fields 0 and 1 from the old field
IEnumerable<string> query =
from line in lines
let x = line.Split(',')
orderby x[2]
select x[2] + ", " + (x[1] + " " + x[0]);
// Execute the query and write out the new file. Note that WriteAllLines
// takes a string[], so ToArray is called on the query.
System.IO.File.WriteAllLines(@"../../../spreadsheet2.csv", query.ToArray());
2.1.7. 문자열 컬렉션 결합 및 비교
텍스트 줄이 포함된 파일을 병합한 다음 결과를 정렬하는 방법
특히 두 개의 텍스트 줄 집합에서의 간단한 연결, 공용 구조체 및 교집합을 수행
- names1.txt 내용
Bankov, Peter
Holm, Michael
Garcia, Hugo
Potra, Cristina
Noriega, Fabricio
Aw, Kam Foo
Beebe, Ann
Toyoshima, Tim
Guy, Wey Yuan
Garcia, Debra
- names2.txt 내용
Liu, Jinghao
Bankov, Peter
Holm, Michael
Garcia, Hugo
Beebe, Ann
Gilchrist, Beth
Myrcha, Jacek
Giakoumakis, Leo
McLin, Nkenge
El Yassir, Mehdi
static void OutputQueryResults(IEnumerable<string> query, string message)
Console.WriteLine(System.Environment.NewLine + message);
foreach (string item in query)
Console.WriteLine("{0} total names in list", query.Count());
//위 함수가 미리 정의돼 있다고 가정
//Put text files in your solution folder
string[] fileA = System.IO.File.ReadAllLines(@"../../../names1.txt");
string[] fileB = System.IO.File.ReadAllLines(@"../../../names2.txt");
//Simple concatenation and sort. Duplicates are preserved.
IEnumerable<string> concatQuery = fileA.Concat(fileB).OrderBy(s => s);
// Pass the query variable to another function for execution.
OutputQueryResults(concatQuery, "Simple concatenate and sort. Duplicates are preserved:");
// Concatenate and remove duplicate names based on default string comparer.(병합)
IEnumerable<string> uniqueNamesQuery =
fileA.Union(fileB).OrderBy(s => s); //병합하면서 distinct도 될터
OutputQueryResults(uniqueNamesQuery, "Union removes duplicate names:");
// Find the names that occur in both files (based on default string comparer). (교집합)
IEnumerable<string> commonNamesQuery =
OutputQueryResults(commonNamesQuery, "Merge based on intersect:");
// Find the matching fields in each list. Merge the two results by using Concat,
// and then sort using the default string comparer.
string nameMatch = "Garcia";
IEnumerable<String> tempQuery1 =
from name in fileA
let n = name.Split(',')
where n[0] == nameMatch //,로 분리후 0번필드중에 nameMatch와 일치하는 것들
select name;
IEnumerable<string> tempQuery2 =
from name2 in fileB
let n2 = name2.Split(',')
where n2[0] == nameMatch
select name2;
IEnumerable<string> nameMatchQuery =
tempQuery1.Concat(tempQuery2).OrderBy(s => s); // 이러면 union효과가 되나? 아님 Left join?? 확인필요
OutputQueryResults(nameMatchQuery, String.Format("Concat based on partial name match \"{0}\":", nameMatch));
2.1.8. 여러 소스로 개체 컬렉션 채우기(두 파일읽어서 조인효과 구현)
- scores.csv
111, 97, 92, 81, 60
112, 75, 84, 91, 39
113, 88, 94, 65, 91
114, 97, 89, 85, 82
115, 35, 72, 91, 70
116, 99, 86, 90, 94
117, 93, 92, 80, 87
118, 92, 90, 83, 78
119, 68, 79, 88, 92
120, 99, 82, 81, 79
121, 96, 85, 91, 60
122, 94, 92, 91, 91
- names.csv
// These data files are defined in How to: Join Content from Dissimilar Files (LINQ)
string[] names = System.IO.File.ReadAllLines(@"../../../names.csv");
string[] scores = System.IO.File.ReadAllLines(@"../../../scores.csv");
// Merge the data sources using a named type.
// var could be used instead of an explicit type.
// Note the dynamic creation of a list of ints for the TestScores member. We skip 1 because the first string
// in the array is the student ID, not an exam score.
IEnumerable<Student> queryNamesScores =
from name in names
let x = name.Split(',')
from score in scores
let s = score.Split(',')
where x[2] == s[0] // score의 첫번째 컬럼과 names의 두번째 컬럼과 조인(학생ID)
select new Student() //이렇게 새로운 타입으로 new
FirstName = x[0],
LastName = x[1],
ID = Convert.ToInt32(x[2]), //타입명시
ExamScores = (from scoreAsText in s.Skip(1) //첫번째 값은 ID이므로 skip
select Convert.ToInt32(scoreAsText)).
// Optional. Store the newly created student objects in memory for faster access in future queries. Could be useful with
// very large data files.
List<Student> students = queryNamesScores.ToList();
// Display the results and perform one further calculation.
foreach (var student in students)
Console.WriteLine("The average score of {0} {1} is {2}.",
student.FirstName, student.LastName, student.ExamScores.Average());
2.1.9. 그룹을 사용하여 파일을 여러 파일로 분할
- names1.csv
Bankov, Peter
Holm, Michael
Garcia, Hugo
Potra, Cristina
Noriega, Fabricio
Aw, Kam Foo
Beebe, Ann
Toyoshima, Tim
Guy, Wey Yuan
Garcia, Debra
- names2.csv
Liu, Jinghao
Bankov, Peter
Holm, Michael
Garcia, Hugo
Beebe, Ann
Gilchrist, Beth
Myrcha, Jacek
Giakoumakis, Leo
McLin, Nkenge
El Yassir, Mehdi
string[] fileA = System.IO.File.ReadAllLines(@"../../../names1.txt");
string[] fileB = System.IO.File.ReadAllLines(@"../../../names2.txt");
// Concatenate and remove duplicate names based on default string comparer
var mergeQuery = fileA.Union(fileB);
// Group the names by the first letter in the last name.
var groupQuery = from name in mergeQuery
let n = name.Split(',')
group name by n[0][0] into g //제일 첫글자 , n[0]은 첫번쨰 컬럼
orderby g.Key
select g;
// Create a new file for each group that was created
// Note that nested foreach loops are required to access
// individual items with each group.
foreach (var g in groupQuery)
// Create the new file name.
string fileName = @"../../../testFile_" + g.Key + ".txt";
// Write file. 그룹별로 파일생성
using (System.IO.StreamWriter sw = new System.IO.StreamWriter(fileName))
foreach (var item in g)
// Output to console for example purposes.
Console.WriteLine(" {0}", item);
2.1.10. 서로 다른 파일의 콘텐츠 조인
코드만 봐도 될듯
string[] names = System.IO.File.ReadAllLines(@"../../../names.csv");
string[] scores = System.IO.File.ReadAllLines(@"../../../scores.csv");
// Name: Last[0], First[1], ID[2], Grade Level[3]
// Omelchenko, Svetlana, 11, 2
// Score: StudentID[0], Exam1[1] Exam2[2], Exam3[3], Exam4[4]
// 111, 97, 92, 81, 60
// This query joins two dissimilar spreadsheets based on common ID value.
// Multiple from clauses are used instead of a join clause
// in order to store results of id.Split.
IEnumerable<string> scoreQuery1 =
from name in names
let nameFields = name.Split(',')
from id in scores
let scoreFields = id.Split(',')
where nameFields[2] == scoreFields[0]
select nameFields[0] + "," + scoreFields[1] + "," + scoreFields[2]
+ "," + scoreFields[3] + "," + scoreFields[4];
// Pass a query variable to a method and
// execute it in the method. The query itself
// is unchanged.
OutputQueryResults(scoreQuery1, "Merge two spreadsheets:");
2.1.10. CSV 텍스트 파일의 열 값 계산
string[] lines = System.IO.File.ReadAllLines(@"../../../scores.csv");
IEnumerable<IEnumerable<int>> query = from line in strs
let x = line.Split(',')
let y = x.Skip(1)
select (
from str in y
select Convert.ToInt32(str));
// Execute and cache the results for performance.
// ToArray could also be used here.
var results = query.ToList();
// Find out how many columns we have.
int columnCount = results[0].Count();
// Perform aggregate calculations on each column.
// One loop for each score column in scores.
// We can use a for loop because we have already executed the columnQuery in the call to ToList.
for (int column = 0; column < columnCount; column++)
var res2 = from row in results
select row.ElementAt(column);
double average = res2.Average(); //컬럼에 대한 average
int max = res2.Max();
int min = res2.Min();
// 1 is added to column because Exam numbers
// begin with 1
Console.WriteLine("Exam #{0} Average: {1:##.##} High Score: {2} Low Score: {3}",
column + 1, average, max, min);
2.1.1. 지정된 특성 또는 이름을 갖는 파일 쿼리
//주어진 경로의 파일정보 가져오는 함수
// This method assumes that the application has discovery permissions for all folders under the specified path.
static IEnumerable<System.IO.FileInfo> GetFiles(string path)
if (!System.IO.Directory.Exists(path))
throw new System.IO.DirectoryNotFoundException();
string[] fileNames = null;
List<System.IO.FileInfo> files = new List<System.IO.FileInfo>();
fileNames = System.IO.Directory.GetFiles(path, "*.*", System.IO.SearchOption.AllDirectories);
foreach (string name in fileNames)
files.Add(new System.IO.FileInfo(name));
return files;
string startFolder = @"c:\program files\Microsoft Visual Studio 9.0\";
// Take a snapshot of the file system.
IEnumerable<System.IO.FileInfo> fileList = GetFiles(startFolder);
//Create the query
IEnumerable<System.IO.FileInfo> fileQuery =
from file in fileList
where file.Extension == ".txt"
orderby file.Name
select file;
//Execute the query. This might write out a lot of files!
foreach (System.IO.FileInfo fi in fileQuery)
Console.WriteLine(fi.FullName); // Create and execute a new query by using the previous
// query as a starting point. fileQuery is not
// executed again until the call to Last()
var newestFile =
(from file in fileQuery
orderby file.CreationTime
select new { file.FullName, file.CreationTime })
Console.WriteLine("\r\nThe newest .txt file is {0}. Creation time: {1}",
newestFile.FullName, newestFile.CreationTime);
2.1.2. 확장명에 따라 파일 그룹화
// Take a snapshot of the file system.
string startFolder = @"c:\program files\Microsoft Visual Studio 9.0\Common7";
// Used in WriteLine to trim output lines.
int trimLength = startFolder.Length;
// Take a snapshot of the file system.
IEnumerable<System.IO.FileInfo> fileList = GetFiles(startFolder);
// Create the query.
var queryGroupByExt =
from file in fileList
group file by file.Extension.ToLower() into fileGroup
orderby fileGroup.Key //그루핑이 되면서 동일그룹에 키값들이 할당됨에 유의
select fileGroup;
PageOutput(trimLength, queryGroupByExt); // 바로아래에 정의
private static void PageOutput( int rootLength,
IEnumerable<System.Linq.IGrouping<string, System.IO.FileInfo>> groupByExtList)
// Flag to break out of paging loop.
bool goAgain = true;
// "3" = 1 line for extension + 1 for "Press any key" + 1 for input cursor.
int numLines = Console.WindowHeight - 3;
// Iterate through the outer collection of groups.
foreach (var filegroup in groupByExtList)
// Start a new extension at the top of a page.
int currentLine = 0;
// Output only as many lines of the current group as will fit in the window.
Console.WriteLine(filegroup.Key == String.Empty ? "[none]" : filegroup.Key);
// Get 'numLines' number of items starting at number 'currentLine'.
var resultPage = filegroup.Skip(currentLine).Take(numLines);
//Execute the resultPage query
foreach (var f in resultPage)
Console.WriteLine("\t{0}", f.FullName.Substring(rootLength));
// Increment the line counter.
currentLine += numLines;
// Give the user a chance to escape.
Console.WriteLine("Press any key to continue or the 'End' key to break...");
ConsoleKey key = Console.ReadKey().Key;
if (key == ConsoleKey.End)
goAgain = false;
} while (currentLine < filegroup.Count());
if (goAgain == false)
2.1.3. 폴더 집합의 전체 바이트 수 쿼리
static long GetFileLength(string filename)
long retval;
try { System.IO.FileInfo fi = new System.IO.FileInfo(filename);
retval = fi.Length;
catch (System.IO.FileNotFoundException)
// If a file is no longer present,
// just add zero bytes to the total.
retval = 0;
return retval;
string startFolder = @"c:\program files\Microsoft Visual Studio 9.0\VC#";
// Take a snapshot of the file system.
// This method assumes that the application has discovery permissions
// for all folders under the specified path.
IEnumerable<string> fileList = System.IO.Directory.GetFiles(startFolder, "*.*", System.IO.SearchOption.AllDirectories);
var fileQuery = from file in fileList
select GetFileLength(file);
// Cache the results to avoid multiple trips to the file system.
long[] fileLengths = fileQuery.ToArray();
// Return the size of the largest file
long largestFile = fileLengths.Max();
// Return the total number of bytes in all the files under the specified folder.
long totalBytes = fileLengths.Sum();
Console.WriteLine("There are {0} bytes in {1} files under {2}",
totalBytes, fileList.Count(), startFolder);
Console.WriteLine("The largest files is {0} bytes.", largestFile);
2.1.3. 두 폴더의 내용 비교
세 가지 방법
- 두 개의 파일 목록이 동일한지 여부를 지정하는 부울 값 쿼리
- 양쪽 파일에 있는 파일을 검색하는 교차 부분 쿼리
- 한 폴더에는 있지만 다른 폴더에는 없는 파일을 검색하는 차집합 쿼리
class FileCompare : System.Collections.Generic.IEqualityComparer<System.IO.FileInfo>
public FileCompare() { }
public bool Equals(System.IO.FileInfo f1, System.IO.FileInfo f2)
return (f1.Name == f2.Name && f1.Length == f2.Length); //파일명과 길이비교
// Return a hash that reflects the comparison criteria. According to the
// rules for IEqualityComparer<T>, if Equals is true, then the hash codes must
// also be equal. Because equality as defined here is a simple value equality, not
// reference identity, it is possible that two or more objects will produce the same
// hash code.
public int GetHashCode(System.IO.FileInfo fi)
string s = String.Format("{0}{1}", fi.Name, fi.Length);
return s.GetHashCode();
라는 Compare클래스 미리정의 된 경우
string pathA = @"C:\TestDir";
string pathB = @"C:\TestDir2"; // Take a snapshot of the file system.
IEnumerable<System.IO.FileInfo> list1 = GetFiles(pathA);
IEnumerable<System.IO.FileInfo> list2 = GetFiles(pathB);
//A custom file comparer defined below
FileCompare myFileCompare = new FileCompare();
// This query determines whether the two folders contain identical file lists, based on the custom file comparer
// that is defined in the FileCompare class.
// The query executes immediately because it returns a bool.
bool areIdentical = list1.SequenceEqual(list2, myFileCompare);
if (areIdentical == true)
Console.WriteLine("the two folders are the same");
Console.WriteLine("The two folders are not the same");
// Find the common files. It produces a sequence and doesn't execute until the foreach statement.
var queryCommonFiles = list1.Intersect(list2, myFileCompare);
if (queryCommonFiles.Count() > 0)
Console.WriteLine("The following files are in both folders:");
foreach (var v in queryCommonFiles)
Console.WriteLine(v.FullName); //shows which items end up in result list
Console.WriteLine("There are no common files in the two folders.");
// Find the set difference between the two folders.
// For this example we only check one way.
var queryList1Only = (from file in list1 select file).Except(list2, myFileCompare);
Console.WriteLine("The following files are in list1 but not list2:");
foreach (var v in queryList1Only)
2.1.4. 디렉터리 트리에서 가장 큰 파일을 하나 이상 쿼리
파일 크기(바이트)와 관련된 5개의 쿼리방법
- 가장 큰 파일의 크기(바이트)를 검색하는 방법
- 가장 작은 파일의 크기(바이트)를 검색하는 방법
- 지정한 루트 폴더에 있는 하나 이상의 폴더에서 FileInfo 개체의 가장 큰 파일이나 가장 작은 파일을 검색하는 방법
- 가장 큰 10개 파일과 같은 시퀀스를 검색하는 방법
- 지정한 크기보다 작은 파일을 무시하여 파일 크기(바이트)에 따라 파일을 그룹으로 정렬하는 방법
string startFolder = @"c:\program files\Microsoft Visual Studio 9.0\";
// Take a snapshot of the file system.
// fileList is an IEnumerable<System.IO.FileInfo>
var fileList = GetFiles(startFolder);
//Return the size of the largest file
long maxSize =
(from file in fileList
let len = GetFileLength(file)
select len).Max();
Console.WriteLine("The length of the largest file under {0} is {1}",
startFolder, maxSize);
// Return the FileInfo object for the largest file by sorting and selecting from beginning of list
System.IO.FileInfo longestFile =
(from file in fileList
let len = GetFileLength(file)
where len > 0
orderby len descending
select file).First();
Console.WriteLine("The largest file under {0} is {1} with a length of {2} bytes",
startFolder, longestFile.FullName, longestFile.Length);
//Return the FileInfo of the smallest file
System.IO.FileInfo smallestFile =
(from file in fileList
let len = GetFileLength(file)
where len > 0
orderby len ascending
select file).First();
Console.WriteLine("The smallest file under {0} is {1} with a length of {2} bytes",
startFolder, smallestFile.FullName, smallestFile.Length);
//Return the FileInfos for the 10 largest files queryTenLargest is an IEnumerable<System.IO.FileInfo>
var queryTenLargest =
(from file in fileList
let len = GetFileLength(file)
orderby len descending
select file).Take(10);
Console.WriteLine("The 10 largest files under {0} are:", startFolder);
foreach (var v in queryTenLargest)
Console.WriteLine("{0}: {1} bytes", v.FullName, v.Length);
// Group the files according to their size, leaving out files that are less than 200000 bytes.
var querySizeGroups =
from file in fileList
let len = GetFileLength(file)
where len > 0
group file by (len / 100000) into fileGroup
where fileGroup.Key >= 2
orderby fileGroup.Key descending
select fileGroup;
foreach (var filegroup in querySizeGroups)
Console.WriteLine(filegroup.Key.ToString() + "00000");
foreach (var item in filegroup)
Console.WriteLine("\t{0}: {1}", item.Name, item.Length);
2.1.5. 디렉터리 트리의 중복 파일 쿼리
첫 번째 쿼리에서는 간단한 키를 사용하여 일치하는 항목을 확인(이름은 같지만 내용은 다를 수 있는 파일임)
두 번째 쿼리에서는 복합 키를 사용하여 FileInfo 개체의 세 가지 속성에 대한 일치 여부를 확인
class QueryDuplicateFileNames
static void Main(string[] args)
// Uncomment QueryDuplicates2 to run that query.
// QueryDuplicates2();
// Keep the console window open in debug mode.
Console.WriteLine("Press any key to exit.");
static void QueryDuplicates()
// Change the root drive or folder if necessary
string startFolder = @"c:\program files\Microsoft Visual Studio 9.0\";
// Take a snapshot of the file system.
IEnumerable<System.IO.FileInfo> fileList = GetFiles(startFolder);
// used in WriteLine to keep the lines shorter
int charsToSkip = startFolder.Length;
// var can be used for convenience with groups.
var queryDupNames =
from file in fileList
group file.FullName.Substring(charsToSkip) by file.Name into fileGroup
where fileGroup.Count() > 1
select fileGroup;
// Pass the query to a method that will
// output one page at a time.
// A Group key that can be passed to a separate method.
// Override Equals and GetHashCode to define equality for the key.
// Override ToString to provide a friendly name for Key.ToString()
class PortableKey
public string Name { get; set; }
public DateTime CreationTime { get; set; }
public long Length {get; set;}
public override bool Equals(object obj)
PortableKey other = (PortableKey)obj;
return other.CreationTime == this.CreationTime &&
other.Length == this.Length &&
other.Name == this.Name;
public override int GetHashCode()
string str = String.Format("{0}{1}{2}", this.CreationTime, this.Length, this.Name);
return str.GetHashCode();
public override string ToString()
return String.Format("{0} {1} {2}", this.Name, this.Length, this.CreationTime);
static void QueryDuplicates2()
// Change the root drive or folder if necessary.
string startFolder = @"c:\program files\Microsoft Visual Studio 9.0\Common7";
// Make the the lines shorter for the console display
int charsToSkip = startFolder.Length;
// Take a snapshot of the file system.
IEnumerable<System.IO.FileInfo> fileList = GetFiles(startFolder);
// Note the use of a compound key. Files that match
// all three properties belong to the same group.
// A named type is used to enable the query to be
// passed to another method. Anonymous types can also be used
// for composite keys but cannot be passed across method boundaries
var queryDupFiles =
from file in fileList
group file.FullName.Substring(charsToSkip) by
new PortableKey{ Name=file.Name, CreationTime=file.CreationTime, Length=file.Length } into fileGroup
where fileGroup.Count() > 1
select fileGroup;
var list = queryDupFiles.ToList();
int i = queryDupFiles.Count();
PageOutput<PortableKey, string>(queryDupFiles);
// A generic method to page the output of the QueryDuplications methods
// Here the type of the group must be specified explicitly. "var" cannot
// be used in method signatures. This method does not display more than one
// group per page.
private static void PageOutput<K,V>(IEnumerable<System.Linq.IGrouping<K, V>> groupByExtList)
// Flag to break out of paging loop.
bool goAgain = true;
// "3" = 1 line for extension + 1 for "Press any key" + 1 for input cursor.
int numLines = Console.WindowHeight - 3;
// Iterate through the outer collection of groups.
foreach (var filegroup in groupByExtList)
// Start a new extension at the top of a page.
int currentLine = 0;
// Output only as many lines of the current group as will fit in the window.
Console.WriteLine("Filename = {0}", filegroup.Key.ToString() == String.Empty ? "[none]" : filegroup.Key.ToString());
// Get 'numLines' number of items starting at number 'currentLine'.
var resultPage = filegroup.Skip(currentLine).Take(numLines);
//Execute the resultPage query
foreach (var fileName in resultPage)
Console.WriteLine("\t{0}", fileName);
// Increment the line counter.
currentLine += numLines;
// Give the user a chance to escape.
Console.WriteLine("Press any key to continue or the 'End' key to break...");
ConsoleKey key = Console.ReadKey().Key;
if (key == ConsoleKey.End)
goAgain = false;
} while (currentLine < filegroup.Count());
if (goAgain == false)
// This method assumes that the application has discovery
// permissions for all folders under the specified path.
static IEnumerable<System.IO.FileInfo> GetFiles(string path)
if (!System.IO.Directory.Exists(path))
throw new System.IO.DirectoryNotFoundException();
string[] fileNames = null;
List<System.IO.FileInfo> files = new List<System.IO.FileInfo>();
fileNames = System.IO.Directory.GetFiles(path, "*.*", System.IO.SearchOption.AllDirectories);
foreach (string name in fileNames)
files.Add(new System.IO.FileInfo(name));
return files;
2.1.6. 폴더의 파일 내용 쿼리
string startFolder = @"c:\program files\Microsoft Visual Studio 9.0\";
// Take a snapshot of the file system.
IEnumerable<System.IO.FileInfo> fileList = GetFiles(startFolder);
string searchTerm = @"Visual Studio";
// Search the contents of each file.
// A regular expression created with the RegEx class
// could be used instead of the Contains method.
// queryMatchingFiles is an IEnumerable<string>.
var queryMatchingFiles =
from file in fileList
where file.Extension == ".htm"
let fileText = GetFileText(file.FullName)
where fileText.Contains(searchTerm)
select file.FullName;
// Execute the query.
Console.WriteLine("The term \"{0}\" was found in:", searchTerm);
foreach (string filename in queryMatchingFiles)